diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index 9c333d133c303..60e331af98398 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -276,7 +276,6 @@ CONFIG_BRIDGE_EBT_T_NAT=m CONFIG_BRIDGE_EBT_ARP=m CONFIG_BRIDGE_EBT_IP=m CONFIG_BRIDGE_EBT_IP6=m -CONFIG_BPFILTER=y CONFIG_IP_SCTP=m CONFIG_RDS=y CONFIG_L2TP=m diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 7f0a7b97ef4ce..b418333bb0863 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -779,7 +779,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i, bool extra_pass, u32 stack_depth) { struct bpf_insn *insn = &fp->insnsi[i]; - s16 branch_oc_off = insn->off; + s32 branch_oc_off = insn->off; u32 dst_reg = insn->dst_reg; u32 src_reg = insn->src_reg; int last, insn_count = 1; diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index bdacbb84456d9..fe30b9ebb8de4 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -307,6 +307,25 @@ static void pop_callee_regs(u8 **pprog, bool *callee_regs_used) *pprog = prog; } +static void emit_nops(u8 **pprog, int len) +{ + u8 *prog = *pprog; + int i, noplen; + + while (len > 0) { + noplen = len; + + if (noplen > ASM_NOP_MAX) + noplen = ASM_NOP_MAX; + + for (i = 0; i < noplen; i++) + EMIT1(x86_nops[noplen][i]); + len -= noplen; + } + + *pprog = prog; +} + /* * Emit the various CFI preambles, see asm/cfi.h and the comments about FineIBT * in arch/x86/kernel/alternative.c @@ -385,8 +404,7 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf, /* BPF trampoline can be made to work without these nops, * but let's waste 5 bytes for now and optimize later */ - memcpy(prog, x86_nops[5], X86_PATCH_SIZE); - prog += X86_PATCH_SIZE; + emit_nops(&prog, X86_PATCH_SIZE); if (!ebpf_from_cbpf) { if (tail_call_reachable && !is_subprog) /* When it's the entry of the whole tailcall context, @@ -692,8 +710,7 @@ static void emit_bpf_tail_call_direct(struct bpf_prog *bpf_prog, if (stack_depth) EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8)); - memcpy(prog, x86_nops[5], X86_PATCH_SIZE); - prog += X86_PATCH_SIZE; + emit_nops(&prog, X86_PATCH_SIZE); /* out: */ ctx->tail_call_direct_label = prog - start; @@ -1055,25 +1072,6 @@ static void detect_reg_usage(struct bpf_insn *insn, int insn_cnt, } } -static void emit_nops(u8 **pprog, int len) -{ - u8 *prog = *pprog; - int i, noplen; - - while (len > 0) { - noplen = len; - - if (noplen > ASM_NOP_MAX) - noplen = ASM_NOP_MAX; - - for (i = 0; i < noplen; i++) - EMIT1(x86_nops[noplen][i]); - len -= noplen; - } - - *pprog = prog; -} - /* emit the 3-byte VEX prefix * * r: same as rex.r, extra bit for ModRM reg field @@ -2700,8 +2698,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im /* remember return value in a stack for bpf prog to access */ emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); im->ip_after_call = image + (prog - (u8 *)rw_image); - memcpy(prog, x86_nops[5], X86_PATCH_SIZE); - prog += X86_PATCH_SIZE; + emit_nops(&prog, X86_PATCH_SIZE); } if (fmod_ret->nr_links) { diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 7671530d6e4e0..e30100597d0a9 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1449,6 +1449,7 @@ struct bpf_prog_aux { bool dev_bound; /* Program is bound to the netdev. */ bool offload_requested; /* Program is bound and offloaded to the netdev. */ bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */ + bool attach_tracing_prog; /* true if tracing another tracing program */ bool func_proto_unreliable; bool sleepable; bool tail_call_reachable; diff --git a/include/linux/bpf_mem_alloc.h b/include/linux/bpf_mem_alloc.h index bb1223b213087..aaf004d943228 100644 --- a/include/linux/bpf_mem_alloc.h +++ b/include/linux/bpf_mem_alloc.h @@ -11,6 +11,7 @@ struct bpf_mem_caches; struct bpf_mem_alloc { struct bpf_mem_caches __percpu *caches; struct bpf_mem_cache __percpu *cache; + struct obj_cgroup *objcg; bool percpu; struct work_struct work; }; @@ -21,8 +22,15 @@ struct bpf_mem_alloc { * 'size = 0' is for bpf_mem_alloc which manages many fixed-size objects. * Alloc and free are done with bpf_mem_{alloc,free}() and the size of * the returned object is given by the size argument of bpf_mem_alloc(). + * If percpu equals true, error will be returned in order to avoid + * large memory consumption and the below bpf_mem_alloc_percpu_unit_init() + * should be used to do on-demand per-cpu allocation for each size. */ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu); +/* Initialize a non-fix-size percpu memory allocator */ +int bpf_mem_alloc_percpu_init(struct bpf_mem_alloc *ma, struct obj_cgroup *objcg); +/* The percpu allocation with a specific unit size. */ +int bpf_mem_alloc_percpu_unit_init(struct bpf_mem_alloc *ma, int size); void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma); /* kmalloc/kfree equivalent: */ diff --git a/include/linux/bpfilter.h b/include/linux/bpfilter.h deleted file mode 100644 index 736ded4905e09..0000000000000 --- a/include/linux/bpfilter.h +++ /dev/null @@ -1,24 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_BPFILTER_H -#define _LINUX_BPFILTER_H - -#include -#include -#include - -struct sock; -int bpfilter_ip_set_sockopt(struct sock *sk, int optname, sockptr_t optval, - unsigned int optlen); -int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval, - int __user *optlen); - -struct bpfilter_umh_ops { - struct umd_info info; - /* since ip_getsockopt() can run in parallel, serialize access to umh */ - struct mutex lock; - int (*sockopt)(struct sock *sk, int optname, sockptr_t optval, - unsigned int optlen, bool is_set); - int (*start)(void); -}; -extern struct bpfilter_umh_ops bpfilter_ops; -#endif diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index c953b8c0d2f43..888a4b217829f 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -100,6 +100,11 @@ struct sk_psock { void (*saved_close)(struct sock *sk, long timeout); void (*saved_write_space)(struct sock *sk); void (*saved_data_ready)(struct sock *sk); + /* psock_update_sk_prot may be called with restore=false many times + * so the handler must be safe for this case. It will be called + * exactly once with restore=true when the psock is being destroyed + * and psock refcnt is zero, but before an RCU grace period. + */ int (*psock_update_sk_prot)(struct sock *sk, struct sk_psock *psock, bool restore); struct proto *sk_proto; diff --git a/include/uapi/linux/bpfilter.h b/include/uapi/linux/bpfilter.h deleted file mode 100644 index cbc1f5813f505..0000000000000 --- a/include/uapi/linux/bpfilter.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _UAPI_LINUX_BPFILTER_H -#define _UAPI_LINUX_BPFILTER_H - -#include - -enum { - BPFILTER_IPT_SO_SET_REPLACE = 64, - BPFILTER_IPT_SO_SET_ADD_COUNTERS = 65, - BPFILTER_IPT_SET_MAX, -}; - -enum { - BPFILTER_IPT_SO_GET_INFO = 64, - BPFILTER_IPT_SO_GET_ENTRIES = 65, - BPFILTER_IPT_SO_GET_REVISION_MATCH = 66, - BPFILTER_IPT_SO_GET_REVISION_TARGET = 67, - BPFILTER_IPT_GET_MAX, -}; - -#endif /* _UAPI_LINUX_BPFILTER_H */ diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c index aa0fbf000a12b..550f02e2cb132 100644 --- a/kernel/bpf/memalloc.c +++ b/kernel/bpf/memalloc.c @@ -121,6 +121,8 @@ struct bpf_mem_caches { struct bpf_mem_cache cache[NUM_CACHES]; }; +static const u16 sizes[NUM_CACHES] = {96, 192, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096}; + static struct llist_node notrace *__llist_del_first(struct llist_head *head) { struct llist_node *entry, *next; @@ -462,11 +464,17 @@ static void notrace irq_work_raise(struct bpf_mem_cache *c) * consume ~ 11 Kbyte per cpu. * Typical case will be between 11K and 116K closer to 11K. * bpf progs can and should share bpf_mem_cache when possible. + * + * Percpu allocation is typically rare. To avoid potential unnecessary large + * memory consumption, set low_mark = 1 and high_mark = 3, resulting in c->batch = 1. */ static void init_refill_work(struct bpf_mem_cache *c) { init_irq_work(&c->refill_work, bpf_mem_refill); - if (c->unit_size <= 256) { + if (c->percpu_size) { + c->low_watermark = 1; + c->high_watermark = 3; + } else if (c->unit_size <= 256) { c->low_watermark = 32; c->high_watermark = 96; } else { @@ -483,11 +491,16 @@ static void init_refill_work(struct bpf_mem_cache *c) static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu) { - /* To avoid consuming memory assume that 1st run of bpf - * prog won't be doing more than 4 map_update_elem from - * irq disabled region + int cnt = 1; + + /* To avoid consuming memory, for non-percpu allocation, assume that + * 1st run of bpf prog won't be doing more than 4 map_update_elem from + * irq disabled region if unit size is less than or equal to 256. + * For all other cases, let us just do one allocation. */ - alloc_bulk(c, c->unit_size <= 256 ? 4 : 1, cpu_to_node(cpu), false); + if (!c->percpu_size && c->unit_size <= 256) + cnt = 4; + alloc_bulk(c, cnt, cpu_to_node(cpu), false); } /* When size != 0 bpf_mem_cache for each cpu. @@ -499,12 +512,14 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu) */ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) { - static u16 sizes[NUM_CACHES] = {96, 192, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096}; struct bpf_mem_caches *cc, __percpu *pcc; struct bpf_mem_cache *c, __percpu *pc; struct obj_cgroup *objcg = NULL; int cpu, i, unit_size, percpu_size = 0; + if (percpu && size == 0) + return -EINVAL; + /* room for llist_node and per-cpu pointer */ if (percpu) percpu_size = LLIST_NODE_SZ + sizeof(void *); @@ -523,6 +538,8 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) if (memcg_bpf_enabled()) objcg = get_obj_cgroup_from_current(); #endif + ma->objcg = objcg; + for_each_possible_cpu(cpu) { c = per_cpu_ptr(pc, cpu); c->unit_size = unit_size; @@ -542,6 +559,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) #ifdef CONFIG_MEMCG_KMEM objcg = get_obj_cgroup_from_current(); #endif + ma->objcg = objcg; for_each_possible_cpu(cpu) { cc = per_cpu_ptr(pcc, cpu); for (i = 0; i < NUM_CACHES; i++) { @@ -560,6 +578,56 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) return 0; } +int bpf_mem_alloc_percpu_init(struct bpf_mem_alloc *ma, struct obj_cgroup *objcg) +{ + struct bpf_mem_caches __percpu *pcc; + + pcc = __alloc_percpu_gfp(sizeof(struct bpf_mem_caches), 8, GFP_KERNEL); + if (!pcc) + return -ENOMEM; + + ma->caches = pcc; + ma->objcg = objcg; + ma->percpu = true; + return 0; +} + +int bpf_mem_alloc_percpu_unit_init(struct bpf_mem_alloc *ma, int size) +{ + struct bpf_mem_caches *cc, __percpu *pcc; + int cpu, i, unit_size, percpu_size; + struct obj_cgroup *objcg; + struct bpf_mem_cache *c; + + i = bpf_mem_cache_idx(size); + if (i < 0) + return -EINVAL; + + /* room for llist_node and per-cpu pointer */ + percpu_size = LLIST_NODE_SZ + sizeof(void *); + + unit_size = sizes[i]; + objcg = ma->objcg; + pcc = ma->caches; + + for_each_possible_cpu(cpu) { + cc = per_cpu_ptr(pcc, cpu); + c = &cc->cache[i]; + if (c->unit_size) + break; + + c->unit_size = unit_size; + c->objcg = objcg; + c->percpu_size = percpu_size; + c->tgt = c; + + init_refill_work(c); + prefill_mem_cache(c, cpu); + } + + return 0; +} + static void drain_mem_cache(struct bpf_mem_cache *c) { bool percpu = !!c->percpu_size; @@ -691,9 +759,8 @@ void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma) rcu_in_progress += atomic_read(&c->call_rcu_ttrace_in_progress); rcu_in_progress += atomic_read(&c->call_rcu_in_progress); } - /* objcg is the same across cpus */ - if (c->objcg) - obj_cgroup_put(c->objcg); + if (ma->objcg) + obj_cgroup_put(ma->objcg); destroy_mem_alloc(ma, rcu_in_progress); } if (ma->caches) { @@ -709,8 +776,8 @@ void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma) rcu_in_progress += atomic_read(&c->call_rcu_in_progress); } } - if (c->objcg) - obj_cgroup_put(c->objcg); + if (ma->objcg) + obj_cgroup_put(ma->objcg); destroy_mem_alloc(ma, rcu_in_progress); } } @@ -833,7 +900,9 @@ void notrace *bpf_mem_alloc(struct bpf_mem_alloc *ma, size_t size) if (!size) return NULL; - idx = bpf_mem_cache_idx(size + LLIST_NODE_SZ); + if (!ma->percpu) + size += LLIST_NODE_SZ; + idx = bpf_mem_cache_idx(size); if (idx < 0) return NULL; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 1bf9805ee185c..a1f18681721c7 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2738,6 +2738,22 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) goto free_prog_sec; } + /* + * Bookkeeping for managing the program attachment chain. + * + * It might be tempting to set attach_tracing_prog flag at the attachment + * time, but this will not prevent from loading bunch of tracing prog + * first, then attach them one to another. + * + * The flag attach_tracing_prog is set for the whole program lifecycle, and + * doesn't have to be cleared in bpf_tracing_link_release, since tracing + * programs cannot change attachment target. + */ + if (type == BPF_PROG_TYPE_TRACING && dst_prog && + dst_prog->type == BPF_PROG_TYPE_TRACING) { + prog->aux->attach_tracing_prog = true; + } + /* find program type: socket_filter vs tracing_filter */ err = find_prog_type(type, prog); if (err < 0) @@ -3171,7 +3187,12 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog, } if (tgt_prog_fd) { - /* For now we only allow new targets for BPF_PROG_TYPE_EXT */ + /* + * For now we only allow new targets for BPF_PROG_TYPE_EXT. If this + * part would be changed to implement the same for + * BPF_PROG_TYPE_TRACING, do not forget to update the way how + * attach_tracing_prog flag is set. + */ if (prog->type != BPF_PROG_TYPE_EXT) { err = -EINVAL; goto out_put_prog; @@ -3216,6 +3237,10 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog, * * - if prog->aux->dst_trampoline and tgt_prog is NULL, the program * was detached and is going for re-attachment. + * + * - if prog->aux->dst_trampoline is NULL and tgt_prog and prog->aux->attach_btf + * are NULL, then program was already attached and user did not provide + * tgt_prog_fd so we have no way to find out or create trampoline */ if (!prog->aux->dst_trampoline && !tgt_prog) { /* @@ -3229,6 +3254,11 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog, err = -EINVAL; goto out_unlock; } + /* We can allow re-attach only if we have valid attach_btf. */ + if (!prog->aux->attach_btf) { + err = -EINVAL; + goto out_unlock; + } btf_id = prog->aux->attach_btf_id; key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf, btf_id); } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a376eb609c414..adbf330d364bb 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -195,6 +195,8 @@ struct bpf_verifier_stack_elem { POISON_POINTER_DELTA)) #define BPF_MAP_PTR(X) ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV)) +#define BPF_GLOBAL_PERCPU_MA_MAX_SIZE 512 + static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx); static int release_reference(struct bpf_verifier_env *env, int ref_obj_id); static void invalidate_non_owning_refs(struct bpf_verifier_env *env); @@ -7279,12 +7281,10 @@ static int check_mem_size_reg(struct bpf_verifier_env *env, return -EACCES; } - if (reg->umin_value == 0) { - err = check_helper_mem_access(env, regno - 1, 0, - zero_size_allowed, - meta); - if (err) - return err; + if (reg->umin_value == 0 && !zero_size_allowed) { + verbose(env, "R%d invalid zero-sized read: u64=[%lld,%lld]\n", + regno, reg->umin_value, reg->umax_value); + return -EACCES; } if (reg->umax_value >= BPF_MAX_VAR_SIZ) { @@ -12141,20 +12141,6 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] && !bpf_global_ma_set) return -ENOMEM; - if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) { - if (!bpf_global_percpu_ma_set) { - mutex_lock(&bpf_percpu_ma_lock); - if (!bpf_global_percpu_ma_set) { - err = bpf_mem_alloc_init(&bpf_global_percpu_ma, 0, true); - if (!err) - bpf_global_percpu_ma_set = true; - } - mutex_unlock(&bpf_percpu_ma_lock); - if (err) - return err; - } - } - if (((u64)(u32)meta.arg_constant.value) != meta.arg_constant.value) { verbose(env, "local type ID argument must be in range [0, U32_MAX]\n"); return -EINVAL; @@ -12175,6 +12161,35 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, return -EINVAL; } + if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) { + if (ret_t->size > BPF_GLOBAL_PERCPU_MA_MAX_SIZE) { + verbose(env, "bpf_percpu_obj_new type size (%d) is greater than %d\n", + ret_t->size, BPF_GLOBAL_PERCPU_MA_MAX_SIZE); + return -EINVAL; + } + + if (!bpf_global_percpu_ma_set) { + mutex_lock(&bpf_percpu_ma_lock); + if (!bpf_global_percpu_ma_set) { + /* Charge memory allocated with bpf_global_percpu_ma to + * root memcg. The obj_cgroup for root memcg is NULL. + */ + err = bpf_mem_alloc_percpu_init(&bpf_global_percpu_ma, NULL); + if (!err) + bpf_global_percpu_ma_set = true; + } + mutex_unlock(&bpf_percpu_ma_lock); + if (err) + return err; + } + + mutex_lock(&bpf_percpu_ma_lock); + err = bpf_mem_alloc_percpu_unit_init(&bpf_global_percpu_ma, ret_t->size); + mutex_unlock(&bpf_percpu_ma_lock); + if (err) + return err; + } + struct_meta = btf_find_struct_meta(ret_btf, ret_btf_id); if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) { if (!__btf_type_is_scalar_struct(env, ret_btf, ret_t, 0)) { @@ -20302,6 +20317,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, struct bpf_attach_target_info *tgt_info) { bool prog_extension = prog->type == BPF_PROG_TYPE_EXT; + bool prog_tracing = prog->type == BPF_PROG_TYPE_TRACING; const char prefix[] = "btf_trace_"; int ret = 0, subprog = -1, i; const struct btf_type *t; @@ -20372,10 +20388,21 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, bpf_log(log, "Can attach to only JITed progs\n"); return -EINVAL; } - if (tgt_prog->type == prog->type) { - /* Cannot fentry/fexit another fentry/fexit program. - * Cannot attach program extension to another extension. - * It's ok to attach fentry/fexit to extension program. + if (prog_tracing) { + if (aux->attach_tracing_prog) { + /* + * Target program is an fentry/fexit which is already attached + * to another tracing program. More levels of nesting + * attachment are not allowed. + */ + bpf_log(log, "Cannot nest tracing program attach more than once\n"); + return -EINVAL; + } + } else if (tgt_prog->type == prog->type) { + /* + * To avoid potential call chain cycles, prevent attaching of a + * program extension to another extension. It's ok to attach + * fentry/fexit to extension program. */ bpf_log(log, "Cannot recursively attach\n"); return -EINVAL; @@ -20388,16 +20415,15 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, * except fentry/fexit. The reason is the following. * The fentry/fexit programs are used for performance * analysis, stats and can be attached to any program - * type except themselves. When extension program is - * replacing XDP function it is necessary to allow - * performance analysis of all functions. Both original - * XDP program and its program extension. Hence - * attaching fentry/fexit to BPF_PROG_TYPE_EXT is - * allowed. If extending of fentry/fexit was allowed it - * would be possible to create long call chain - * fentry->extension->fentry->extension beyond - * reasonable stack size. Hence extending fentry is not - * allowed. + * type. When extension program is replacing XDP function + * it is necessary to allow performance analysis of all + * functions. Both original XDP program and its program + * extension. Hence attaching fentry/fexit to + * BPF_PROG_TYPE_EXT is allowed. If extending of + * fentry/fexit was allowed it would be possible to create + * long call chain fentry->extension->fentry->extension + * beyond reasonable stack size. Hence extending fentry + * is not allowed. */ bpf_log(log, "Cannot extend fentry/fexit\n"); return -EINVAL; diff --git a/net/Kconfig b/net/Kconfig index 3ec6bc98fa05d..4adc47d0c9c2a 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -233,8 +233,6 @@ source "net/bridge/netfilter/Kconfig" endif -source "net/bpfilter/Kconfig" - source "net/dccp/Kconfig" source "net/sctp/Kconfig" source "net/rds/Kconfig" diff --git a/net/Makefile b/net/Makefile index 4c4dc535453df..b06b5539e7a6b 100644 --- a/net/Makefile +++ b/net/Makefile @@ -19,7 +19,6 @@ obj-$(CONFIG_TLS) += tls/ obj-$(CONFIG_XFRM) += xfrm/ obj-$(CONFIG_UNIX_SCM) += unix/ obj-y += ipv6/ -obj-$(CONFIG_BPFILTER) += bpfilter/ obj-$(CONFIG_PACKET) += packet/ obj-$(CONFIG_NET_KEY) += key/ obj-$(CONFIG_BRIDGE) += bridge/ diff --git a/net/bpfilter/.gitignore b/net/bpfilter/.gitignore deleted file mode 100644 index f34e85ee82044..0000000000000 --- a/net/bpfilter/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -bpfilter_umh diff --git a/net/bpfilter/Kconfig b/net/bpfilter/Kconfig deleted file mode 100644 index 3d4a214624583..0000000000000 --- a/net/bpfilter/Kconfig +++ /dev/null @@ -1,23 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -menuconfig BPFILTER - bool "BPF based packet filtering framework (BPFILTER)" - depends on BPF && INET - select USERMODE_DRIVER - help - This builds experimental bpfilter framework that is aiming to - provide netfilter compatible functionality via BPF - -if BPFILTER -config BPFILTER_UMH - tristate "bpfilter kernel module with user mode helper" - depends on CC_CAN_LINK - depends on m || CC_CAN_LINK_STATIC - default m - help - This builds bpfilter kernel module with embedded user mode helper - - Note: To compile this as built-in, your toolchain must support - building static binaries, since rootfs isn't mounted at the time - when __init functions are called and do_execv won't be able to find - the elf interpreter. -endif diff --git a/net/bpfilter/Makefile b/net/bpfilter/Makefile deleted file mode 100644 index cdac82b8c53af..0000000000000 --- a/net/bpfilter/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# Makefile for the Linux BPFILTER layer. -# - -userprogs := bpfilter_umh -bpfilter_umh-objs := main.o -userccflags += -I $(srctree)/tools/include/ -I $(srctree)/tools/include/uapi - -ifeq ($(CONFIG_BPFILTER_UMH), y) -# builtin bpfilter_umh should be linked with -static -# since rootfs isn't mounted at the time of __init -# function is called and do_execv won't find elf interpreter -userldflags += -static -endif - -$(obj)/bpfilter_umh_blob.o: $(obj)/bpfilter_umh - -obj-$(CONFIG_BPFILTER_UMH) += bpfilter.o -bpfilter-objs += bpfilter_kern.o bpfilter_umh_blob.o diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c deleted file mode 100644 index 97e129e3f31c0..0000000000000 --- a/net/bpfilter/bpfilter_kern.c +++ /dev/null @@ -1,136 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include -#include -#include -#include -#include -#include -#include -#include -#include "msgfmt.h" - -extern char bpfilter_umh_start; -extern char bpfilter_umh_end; - -static void shutdown_umh(void) -{ - struct umd_info *info = &bpfilter_ops.info; - struct pid *tgid = info->tgid; - - if (tgid) { - kill_pid(tgid, SIGKILL, 1); - wait_event(tgid->wait_pidfd, thread_group_exited(tgid)); - umd_cleanup_helper(info); - } -} - -static void __stop_umh(void) -{ - if (IS_ENABLED(CONFIG_INET)) - shutdown_umh(); -} - -static int bpfilter_send_req(struct mbox_request *req) -{ - struct mbox_reply reply; - loff_t pos = 0; - ssize_t n; - - if (!bpfilter_ops.info.tgid) - return -EFAULT; - pos = 0; - n = kernel_write(bpfilter_ops.info.pipe_to_umh, req, sizeof(*req), - &pos); - if (n != sizeof(*req)) { - pr_err("write fail %zd\n", n); - goto stop; - } - pos = 0; - n = kernel_read(bpfilter_ops.info.pipe_from_umh, &reply, sizeof(reply), - &pos); - if (n != sizeof(reply)) { - pr_err("read fail %zd\n", n); - goto stop; - } - return reply.status; -stop: - __stop_umh(); - return -EFAULT; -} - -static int bpfilter_process_sockopt(struct sock *sk, int optname, - sockptr_t optval, unsigned int optlen, - bool is_set) -{ - struct mbox_request req = { - .is_set = is_set, - .pid = current->pid, - .cmd = optname, - .addr = (uintptr_t)optval.user, - .len = optlen, - }; - if (sockptr_is_kernel(optval)) { - pr_err("kernel access not supported\n"); - return -EFAULT; - } - return bpfilter_send_req(&req); -} - -static int start_umh(void) -{ - struct mbox_request req = { .pid = current->pid }; - int err; - - /* fork usermode process */ - err = fork_usermode_driver(&bpfilter_ops.info); - if (err) - return err; - pr_info("Loaded bpfilter_umh pid %d\n", pid_nr(bpfilter_ops.info.tgid)); - - /* health check that usermode process started correctly */ - if (bpfilter_send_req(&req) != 0) { - shutdown_umh(); - return -EFAULT; - } - - return 0; -} - -static int __init load_umh(void) -{ - int err; - - err = umd_load_blob(&bpfilter_ops.info, - &bpfilter_umh_start, - &bpfilter_umh_end - &bpfilter_umh_start); - if (err) - return err; - - mutex_lock(&bpfilter_ops.lock); - err = start_umh(); - if (!err && IS_ENABLED(CONFIG_INET)) { - bpfilter_ops.sockopt = &bpfilter_process_sockopt; - bpfilter_ops.start = &start_umh; - } - mutex_unlock(&bpfilter_ops.lock); - if (err) - umd_unload_blob(&bpfilter_ops.info); - return err; -} - -static void __exit fini_umh(void) -{ - mutex_lock(&bpfilter_ops.lock); - if (IS_ENABLED(CONFIG_INET)) { - shutdown_umh(); - bpfilter_ops.start = NULL; - bpfilter_ops.sockopt = NULL; - } - mutex_unlock(&bpfilter_ops.lock); - - umd_unload_blob(&bpfilter_ops.info); -} -module_init(load_umh); -module_exit(fini_umh); -MODULE_LICENSE("GPL"); diff --git a/net/bpfilter/bpfilter_umh_blob.S b/net/bpfilter/bpfilter_umh_blob.S deleted file mode 100644 index 40311d10d2f27..0000000000000 --- a/net/bpfilter/bpfilter_umh_blob.S +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - .section .init.rodata, "a" - .global bpfilter_umh_start -bpfilter_umh_start: - .incbin "net/bpfilter/bpfilter_umh" - .global bpfilter_umh_end -bpfilter_umh_end: diff --git a/net/bpfilter/main.c b/net/bpfilter/main.c deleted file mode 100644 index 291a925462463..0000000000000 --- a/net/bpfilter/main.c +++ /dev/null @@ -1,64 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#define _GNU_SOURCE -#include -#include -#include -#include -#include -#include -#include "../../include/uapi/linux/bpf.h" -#include -#include "msgfmt.h" - -FILE *debug_f; - -static int handle_get_cmd(struct mbox_request *cmd) -{ - switch (cmd->cmd) { - case 0: - return 0; - default: - break; - } - return -ENOPROTOOPT; -} - -static int handle_set_cmd(struct mbox_request *cmd) -{ - return -ENOPROTOOPT; -} - -static void loop(void) -{ - while (1) { - struct mbox_request req; - struct mbox_reply reply; - int n; - - n = read(0, &req, sizeof(req)); - if (n != sizeof(req)) { - fprintf(debug_f, "invalid request %d\n", n); - return; - } - - reply.status = req.is_set ? - handle_set_cmd(&req) : - handle_get_cmd(&req); - - n = write(1, &reply, sizeof(reply)); - if (n != sizeof(reply)) { - fprintf(debug_f, "reply failed %d\n", n); - return; - } - } -} - -int main(void) -{ - debug_f = fopen("/dev/kmsg", "w"); - setvbuf(debug_f, 0, _IOLBF, 0); - fprintf(debug_f, "<5>Started bpfilter\n"); - loop(); - fclose(debug_f); - return 0; -} diff --git a/net/bpfilter/msgfmt.h b/net/bpfilter/msgfmt.h deleted file mode 100644 index 98d121c629450..0000000000000 --- a/net/bpfilter/msgfmt.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NET_BPFILTER_MSGFMT_H -#define _NET_BPFILTER_MSGFMT_H - -struct mbox_request { - __u64 addr; - __u32 len; - __u32 is_set; - __u32 cmd; - __u32 pid; -}; - -struct mbox_reply { - __u32 status; -}; - -#endif diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index e144a02a6a610..ec36d2ec059e8 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -16,8 +16,6 @@ obj-y := route.o inetpeer.o protocol.o \ inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o \ metrics.o netlink.o nexthop.o udp_tunnel_stub.o -obj-$(CONFIG_BPFILTER) += bpfilter/ - obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o obj-$(CONFIG_PROC_FS) += proc.o diff --git a/net/ipv4/bpfilter/Makefile b/net/ipv4/bpfilter/Makefile deleted file mode 100644 index 00af5305e05a1..0000000000000 --- a/net/ipv4/bpfilter/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_BPFILTER) += sockopt.o diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c deleted file mode 100644 index 193bcc2acccc9..0000000000000 --- a/net/ipv4/bpfilter/sockopt.c +++ /dev/null @@ -1,71 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct bpfilter_umh_ops bpfilter_ops; -EXPORT_SYMBOL_GPL(bpfilter_ops); - -static int bpfilter_mbox_request(struct sock *sk, int optname, sockptr_t optval, - unsigned int optlen, bool is_set) -{ - int err; - mutex_lock(&bpfilter_ops.lock); - if (!bpfilter_ops.sockopt) { - mutex_unlock(&bpfilter_ops.lock); - request_module("bpfilter"); - mutex_lock(&bpfilter_ops.lock); - - if (!bpfilter_ops.sockopt) { - err = -ENOPROTOOPT; - goto out; - } - } - if (bpfilter_ops.info.tgid && - thread_group_exited(bpfilter_ops.info.tgid)) - umd_cleanup_helper(&bpfilter_ops.info); - - if (!bpfilter_ops.info.tgid) { - err = bpfilter_ops.start(); - if (err) - goto out; - } - err = bpfilter_ops.sockopt(sk, optname, optval, optlen, is_set); -out: - mutex_unlock(&bpfilter_ops.lock); - return err; -} - -int bpfilter_ip_set_sockopt(struct sock *sk, int optname, sockptr_t optval, - unsigned int optlen) -{ - return bpfilter_mbox_request(sk, optname, optval, optlen, true); -} - -int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval, - int __user *optlen) -{ - int len; - - if (get_user(len, optlen)) - return -EFAULT; - - return bpfilter_mbox_request(sk, optname, USER_SOCKPTR(optval), len, - false); -} - -static int __init bpfilter_sockopt_init(void) -{ - mutex_init(&bpfilter_ops.lock); - bpfilter_ops.info.tgid = NULL; - bpfilter_ops.info.driver_name = "bpfilter_umh"; - - return 0; -} -device_initcall(bpfilter_sockopt_init); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 66247e8b429e4..7aa9dc0e6760d 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -47,8 +47,6 @@ #include #include -#include - /* * SOL_IP control messages. */ @@ -1411,11 +1409,6 @@ int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, return -ENOPROTOOPT; err = do_ip_setsockopt(sk, level, optname, optval, optlen); -#if IS_ENABLED(CONFIG_BPFILTER_UMH) - if (optname >= BPFILTER_IPT_SO_SET_REPLACE && - optname < BPFILTER_IPT_SET_MAX) - err = bpfilter_ip_set_sockopt(sk, optname, optval, optlen); -#endif #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IP_HDRINCL && @@ -1763,11 +1756,6 @@ int ip_getsockopt(struct sock *sk, int level, err = do_ip_getsockopt(sk, level, optname, USER_SOCKPTR(optval), USER_SOCKPTR(optlen)); -#if IS_ENABLED(CONFIG_BPFILTER_UMH) - if (optname >= BPFILTER_IPT_SO_GET_INFO && - optname < BPFILTER_IPT_GET_MAX) - err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen); -#endif #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS && diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c index 7ea7c3a0d0d06..bd84785bf8d6c 100644 --- a/net/unix/unix_bpf.c +++ b/net/unix/unix_bpf.c @@ -161,15 +161,30 @@ int unix_stream_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool r { struct sock *sk_pair; + /* Restore does not decrement the sk_pair reference yet because we must + * keep the a reference to the socket until after an RCU grace period + * and any pending sends have completed. + */ if (restore) { sk->sk_write_space = psock->saved_write_space; sock_replace_proto(sk, psock->sk_proto); return 0; } - sk_pair = unix_peer(sk); - sock_hold(sk_pair); - psock->sk_pair = sk_pair; + /* psock_update_sk_prot can be called multiple times if psock is + * added to multiple maps and/or slots in the same map. There is + * also an edge case where replacing a psock with itself can trigger + * an extra psock_update_sk_prot during the insert process. So it + * must be safe to do multiple calls. Here we need to ensure we don't + * increment the refcnt through sock_hold many times. There will only + * be a single matching destroy operation. + */ + if (!psock->sk_pair) { + sk_pair = unix_peer(sk); + sock_hold(sk_pair); + psock->sk_pair = sk_pair; + } + unix_stream_bpf_check_needs_rebuild(psock->sk_proto); sock_replace_proto(sk, &unix_stream_bpf_prot); return 0; diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index edda4fc2c4d03..708733b0ea061 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -426,10 +426,6 @@ static void probe_kernel_image_config(const char *define_prefix) { "CONFIG_BPF_STREAM_PARSER", }, /* xt_bpf module for passing BPF programs to netfilter */ { "CONFIG_NETFILTER_XT_MATCH_BPF", }, - /* bpfilter back-end for iptables */ - { "CONFIG_BPFILTER", }, - /* bpftilter module with "user mode helper" */ - { "CONFIG_BPFILTER_UMH", }, /* test_bpf module for BPF tests */ { "CONFIG_TEST_BPF", }, diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index ebcfb2147fbd8..c5a42ac309fdb 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1503,6 +1503,16 @@ static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *nam return ERR_PTR(-ENOENT); } +static int create_placeholder_fd(void) +{ + int fd; + + fd = ensure_good_fd(memfd_create("libbpf-placeholder-fd", MFD_CLOEXEC)); + if (fd < 0) + return -errno; + return fd; +} + static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) { struct bpf_map *map; @@ -1515,7 +1525,21 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) map = &obj->maps[obj->nr_maps++]; map->obj = obj; - map->fd = -1; + /* Preallocate map FD without actually creating BPF map just yet. + * These map FD "placeholders" will be reused later without changing + * FD value when map is actually created in the kernel. + * + * This is useful to be able to perform BPF program relocations + * without having to create BPF maps before that step. This allows us + * to finalize and load BTF very late in BPF object's loading phase, + * right before BPF maps have to be created and BPF programs have to + * be loaded. By having these map FD placeholders we can perform all + * the sanitizations, relocations, and any other adjustments before we + * start creating actual BPF kernel objects (BTF, maps, progs). + */ + map->fd = create_placeholder_fd(); + if (map->fd < 0) + return ERR_PTR(map->fd); map->inner_map_fd = -1; map->autocreate = true; @@ -2607,7 +2631,9 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, map->inner_map = calloc(1, sizeof(*map->inner_map)); if (!map->inner_map) return -ENOMEM; - map->inner_map->fd = -1; + map->inner_map->fd = create_placeholder_fd(); + if (map->inner_map->fd < 0) + return map->inner_map->fd; map->inner_map->sec_idx = sec_idx; map->inner_map->name = malloc(strlen(map_name) + sizeof(".inner") + 1); if (!map->inner_map->name) @@ -3166,86 +3192,6 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) } } - if (!kernel_supports(obj, FEAT_BTF_DECL_TAG)) - goto skip_exception_cb; - for (i = 0; i < obj->nr_programs; i++) { - struct bpf_program *prog = &obj->programs[i]; - int j, k, n; - - if (prog_is_subprog(obj, prog)) - continue; - n = btf__type_cnt(obj->btf); - for (j = 1; j < n; j++) { - const char *str = "exception_callback:", *name; - size_t len = strlen(str); - struct btf_type *t; - - t = btf_type_by_id(obj->btf, j); - if (!btf_is_decl_tag(t) || btf_decl_tag(t)->component_idx != -1) - continue; - - name = btf__str_by_offset(obj->btf, t->name_off); - if (strncmp(name, str, len)) - continue; - - t = btf_type_by_id(obj->btf, t->type); - if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL) { - pr_warn("prog '%s': exception_callback: decl tag not applied to the main program\n", - prog->name); - return -EINVAL; - } - if (strcmp(prog->name, btf__str_by_offset(obj->btf, t->name_off))) - continue; - /* Multiple callbacks are specified for the same prog, - * the verifier will eventually return an error for this - * case, hence simply skip appending a subprog. - */ - if (prog->exception_cb_idx >= 0) { - prog->exception_cb_idx = -1; - break; - } - - name += len; - if (str_is_empty(name)) { - pr_warn("prog '%s': exception_callback: decl tag contains empty value\n", - prog->name); - return -EINVAL; - } - - for (k = 0; k < obj->nr_programs; k++) { - struct bpf_program *subprog = &obj->programs[k]; - - if (!prog_is_subprog(obj, subprog)) - continue; - if (strcmp(name, subprog->name)) - continue; - /* Enforce non-hidden, as from verifier point of - * view it expects global functions, whereas the - * mark_btf_static fixes up linkage as static. - */ - if (!subprog->sym_global || subprog->mark_btf_static) { - pr_warn("prog '%s': exception callback %s must be a global non-hidden function\n", - prog->name, subprog->name); - return -EINVAL; - } - /* Let's see if we already saw a static exception callback with the same name */ - if (prog->exception_cb_idx >= 0) { - pr_warn("prog '%s': multiple subprogs with same name as exception callback '%s'\n", - prog->name, subprog->name); - return -EINVAL; - } - prog->exception_cb_idx = k; - break; - } - - if (prog->exception_cb_idx >= 0) - continue; - pr_warn("prog '%s': cannot find exception callback '%s'\n", prog->name, name); - return -ENOENT; - } - } -skip_exception_cb: - sanitize = btf_needs_sanitization(obj); if (sanitize) { const void *raw_data; @@ -4549,14 +4495,12 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd) goto err_free_new_name; } - err = zclose(map->fd); - if (err) { - err = -errno; - goto err_close_new_fd; - } + err = reuse_fd(map->fd, new_fd); + if (err) + goto err_free_new_name; + free(map->name); - map->fd = new_fd; map->name = new_name; map->def.type = info.type; map->def.key_size = info.key_size; @@ -4570,8 +4514,6 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd) return 0; -err_close_new_fd: - close(new_fd); err_free_new_name: free(new_name); return libbpf_err(err); @@ -5200,12 +5142,17 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) static void bpf_map__destroy(struct bpf_map *map); +static bool map_is_created(const struct bpf_map *map) +{ + return map->obj->loaded || map->reused; +} + static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner) { LIBBPF_OPTS(bpf_map_create_opts, create_attr); struct bpf_map_def *def = &map->def; const char *map_name = NULL; - int err = 0; + int err = 0, map_fd; if (kernel_supports(obj, FEAT_PROG_NAME)) map_name = map->name; @@ -5231,7 +5178,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b map->name, err); return err; } - map->inner_map_fd = bpf_map__fd(map->inner_map); + map->inner_map_fd = map->inner_map->fd; } if (map->inner_map_fd >= 0) create_attr.inner_map_fd = map->inner_map_fd; @@ -5264,17 +5211,19 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b bpf_gen__map_create(obj->gen_loader, def->type, map_name, def->key_size, def->value_size, def->max_entries, &create_attr, is_inner ? -1 : map - obj->maps); - /* Pretend to have valid FD to pass various fd >= 0 checks. - * This fd == 0 will not be used with any syscall and will be reset to -1 eventually. + /* We keep pretenting we have valid FD to pass various fd >= 0 + * checks by just keeping original placeholder FDs in place. + * See bpf_object__add_map() comment. + * This placeholder fd will not be used with any syscall and + * will be reset to -1 eventually. */ - map->fd = 0; + map_fd = map->fd; } else { - map->fd = bpf_map_create(def->type, map_name, - def->key_size, def->value_size, - def->max_entries, &create_attr); + map_fd = bpf_map_create(def->type, map_name, + def->key_size, def->value_size, + def->max_entries, &create_attr); } - if (map->fd < 0 && (create_attr.btf_key_type_id || - create_attr.btf_value_type_id)) { + if (map_fd < 0 && (create_attr.btf_key_type_id || create_attr.btf_value_type_id)) { char *cp, errmsg[STRERR_BUFSIZE]; err = -errno; @@ -5286,13 +5235,11 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b create_attr.btf_value_type_id = 0; map->btf_key_type_id = 0; map->btf_value_type_id = 0; - map->fd = bpf_map_create(def->type, map_name, - def->key_size, def->value_size, - def->max_entries, &create_attr); + map_fd = bpf_map_create(def->type, map_name, + def->key_size, def->value_size, + def->max_entries, &create_attr); } - err = map->fd < 0 ? -errno : 0; - if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) { if (obj->gen_loader) map->inner_map->fd = -1; @@ -5300,7 +5247,19 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b zfree(&map->inner_map); } - return err; + if (map_fd < 0) + return map_fd; + + /* obj->gen_loader case, prevent reuse_fd() from closing map_fd */ + if (map->fd == map_fd) + return 0; + + /* Keep placeholder FD value but now point it to the BPF map object. + * This way everything that relied on this map's FD (e.g., relocated + * ldimm64 instructions) will stay valid and won't need adjustments. + * map->fd stays valid but now point to what map_fd points to. + */ + return reuse_fd(map->fd, map_fd); } static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map) @@ -5314,7 +5273,7 @@ static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map) continue; targ_map = map->init_slots[i]; - fd = bpf_map__fd(targ_map); + fd = targ_map->fd; if (obj->gen_loader) { bpf_gen__populate_outer_map(obj->gen_loader, @@ -5384,10 +5343,8 @@ static int bpf_object_init_prog_arrays(struct bpf_object *obj) continue; err = init_prog_array_slots(obj, map); - if (err < 0) { - zclose(map->fd); + if (err < 0) return err; - } } return 0; } @@ -5465,7 +5422,7 @@ bpf_object__create_maps(struct bpf_object *obj) } } - if (map->fd >= 0) { + if (map->reused) { pr_debug("map '%s': skipping creation (preset fd=%d)\n", map->name, map->fd); } else { @@ -5478,25 +5435,20 @@ bpf_object__create_maps(struct bpf_object *obj) if (bpf_map__is_internal(map)) { err = bpf_object__populate_internal_map(obj, map); - if (err < 0) { - zclose(map->fd); + if (err < 0) goto err_out; - } } if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) { err = init_map_in_map_slots(obj, map); - if (err < 0) { - zclose(map->fd); + if (err < 0) goto err_out; - } } } if (map->pin_path && !map->pinned) { err = bpf_map__pin(map, NULL); if (err) { - zclose(map->fd); if (!retried && err == -EEXIST) { retried = true; goto retry; @@ -6229,7 +6181,7 @@ reloc_prog_func_and_line_info(const struct bpf_object *obj, int err; /* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't - * supprot func/line info + * support func/line info */ if (!obj->btf_ext || !kernel_supports(obj, FEAT_BTF_FUNC)) return 0; @@ -6629,8 +6581,329 @@ static void bpf_object__sort_relos(struct bpf_object *obj) } } -static int -bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) +static int bpf_prog_assign_exc_cb(struct bpf_object *obj, struct bpf_program *prog) +{ + const char *str = "exception_callback:"; + size_t pfx_len = strlen(str); + int i, j, n; + + if (!obj->btf || !kernel_supports(obj, FEAT_BTF_DECL_TAG)) + return 0; + + n = btf__type_cnt(obj->btf); + for (i = 1; i < n; i++) { + const char *name; + struct btf_type *t; + + t = btf_type_by_id(obj->btf, i); + if (!btf_is_decl_tag(t) || btf_decl_tag(t)->component_idx != -1) + continue; + + name = btf__str_by_offset(obj->btf, t->name_off); + if (strncmp(name, str, pfx_len) != 0) + continue; + + t = btf_type_by_id(obj->btf, t->type); + if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL) { + pr_warn("prog '%s': exception_callback: decl tag not applied to the main program\n", + prog->name); + return -EINVAL; + } + if (strcmp(prog->name, btf__str_by_offset(obj->btf, t->name_off)) != 0) + continue; + /* Multiple callbacks are specified for the same prog, + * the verifier will eventually return an error for this + * case, hence simply skip appending a subprog. + */ + if (prog->exception_cb_idx >= 0) { + prog->exception_cb_idx = -1; + break; + } + + name += pfx_len; + if (str_is_empty(name)) { + pr_warn("prog '%s': exception_callback: decl tag contains empty value\n", + prog->name); + return -EINVAL; + } + + for (j = 0; j < obj->nr_programs; j++) { + struct bpf_program *subprog = &obj->programs[j]; + + if (!prog_is_subprog(obj, subprog)) + continue; + if (strcmp(name, subprog->name) != 0) + continue; + /* Enforce non-hidden, as from verifier point of + * view it expects global functions, whereas the + * mark_btf_static fixes up linkage as static. + */ + if (!subprog->sym_global || subprog->mark_btf_static) { + pr_warn("prog '%s': exception callback %s must be a global non-hidden function\n", + prog->name, subprog->name); + return -EINVAL; + } + /* Let's see if we already saw a static exception callback with the same name */ + if (prog->exception_cb_idx >= 0) { + pr_warn("prog '%s': multiple subprogs with same name as exception callback '%s'\n", + prog->name, subprog->name); + return -EINVAL; + } + prog->exception_cb_idx = j; + break; + } + + if (prog->exception_cb_idx >= 0) + continue; + + pr_warn("prog '%s': cannot find exception callback '%s'\n", prog->name, name); + return -ENOENT; + } + + return 0; +} + +static struct { + enum bpf_prog_type prog_type; + const char *ctx_name; +} global_ctx_map[] = { + { BPF_PROG_TYPE_CGROUP_DEVICE, "bpf_cgroup_dev_ctx" }, + { BPF_PROG_TYPE_CGROUP_SKB, "__sk_buff" }, + { BPF_PROG_TYPE_CGROUP_SOCK, "bpf_sock" }, + { BPF_PROG_TYPE_CGROUP_SOCK_ADDR, "bpf_sock_addr" }, + { BPF_PROG_TYPE_CGROUP_SOCKOPT, "bpf_sockopt" }, + { BPF_PROG_TYPE_CGROUP_SYSCTL, "bpf_sysctl" }, + { BPF_PROG_TYPE_FLOW_DISSECTOR, "__sk_buff" }, + { BPF_PROG_TYPE_KPROBE, "bpf_user_pt_regs_t" }, + { BPF_PROG_TYPE_LWT_IN, "__sk_buff" }, + { BPF_PROG_TYPE_LWT_OUT, "__sk_buff" }, + { BPF_PROG_TYPE_LWT_SEG6LOCAL, "__sk_buff" }, + { BPF_PROG_TYPE_LWT_XMIT, "__sk_buff" }, + { BPF_PROG_TYPE_NETFILTER, "bpf_nf_ctx" }, + { BPF_PROG_TYPE_PERF_EVENT, "bpf_perf_event_data" }, + { BPF_PROG_TYPE_RAW_TRACEPOINT, "bpf_raw_tracepoint_args" }, + { BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, "bpf_raw_tracepoint_args" }, + { BPF_PROG_TYPE_SCHED_ACT, "__sk_buff" }, + { BPF_PROG_TYPE_SCHED_CLS, "__sk_buff" }, + { BPF_PROG_TYPE_SK_LOOKUP, "bpf_sk_lookup" }, + { BPF_PROG_TYPE_SK_MSG, "sk_msg_md" }, + { BPF_PROG_TYPE_SK_REUSEPORT, "sk_reuseport_md" }, + { BPF_PROG_TYPE_SK_SKB, "__sk_buff" }, + { BPF_PROG_TYPE_SOCK_OPS, "bpf_sock_ops" }, + { BPF_PROG_TYPE_SOCKET_FILTER, "__sk_buff" }, + { BPF_PROG_TYPE_XDP, "xdp_md" }, + /* all other program types don't have "named" context structs */ +}; + +static int clone_func_btf_info(struct btf *btf, int orig_fn_id, struct bpf_program *prog) +{ + int fn_id, fn_proto_id, ret_type_id, orig_proto_id; + int i, err, arg_cnt, fn_name_off, linkage; + struct btf_type *fn_t, *fn_proto_t, *t; + struct btf_param *p; + + /* caller already validated FUNC -> FUNC_PROTO validity */ + fn_t = btf_type_by_id(btf, orig_fn_id); + fn_proto_t = btf_type_by_id(btf, fn_t->type); + + /* Note that each btf__add_xxx() operation invalidates + * all btf_type and string pointers, so we need to be + * very careful when cloning BTF types. BTF type + * pointers have to be always refetched. And to avoid + * problems with invalidated string pointers, we + * add empty strings initially, then just fix up + * name_off offsets in place. Offsets are stable for + * existing strings, so that works out. + */ + fn_name_off = fn_t->name_off; /* we are about to invalidate fn_t */ + linkage = btf_func_linkage(fn_t); + orig_proto_id = fn_t->type; /* original FUNC_PROTO ID */ + ret_type_id = fn_proto_t->type; /* fn_proto_t will be invalidated */ + arg_cnt = btf_vlen(fn_proto_t); + + /* clone FUNC_PROTO and its params */ + fn_proto_id = btf__add_func_proto(btf, ret_type_id); + if (fn_proto_id < 0) + return -EINVAL; + + for (i = 0; i < arg_cnt; i++) { + int name_off; + + /* copy original parameter data */ + t = btf_type_by_id(btf, orig_proto_id); + p = &btf_params(t)[i]; + name_off = p->name_off; + + err = btf__add_func_param(btf, "", p->type); + if (err) + return err; + + fn_proto_t = btf_type_by_id(btf, fn_proto_id); + p = &btf_params(fn_proto_t)[i]; + p->name_off = name_off; /* use remembered str offset */ + } + + /* clone FUNC now, btf__add_func() enforces non-empty name, so use + * entry program's name as a placeholder, which we replace immediately + * with original name_off + */ + fn_id = btf__add_func(btf, prog->name, linkage, fn_proto_id); + if (fn_id < 0) + return -EINVAL; + + fn_t = btf_type_by_id(btf, fn_id); + fn_t->name_off = fn_name_off; /* reuse original string */ + + return fn_id; +} + +/* Check if main program or global subprog's function prototype has `arg:ctx` + * argument tags, and, if necessary, substitute correct type to match what BPF + * verifier would expect, taking into account specific program type. This + * allows to support __arg_ctx tag transparently on old kernels that don't yet + * have a native support for it in the verifier, making user's life much + * easier. + */ +static int bpf_program_fixup_func_info(struct bpf_object *obj, struct bpf_program *prog) +{ + const char *ctx_name = NULL, *ctx_tag = "arg:ctx"; + struct bpf_func_info_min *func_rec; + struct btf_type *fn_t, *fn_proto_t; + struct btf *btf = obj->btf; + const struct btf_type *t; + struct btf_param *p; + int ptr_id = 0, struct_id, tag_id, orig_fn_id; + int i, n, arg_idx, arg_cnt, err, rec_idx; + int *orig_ids; + + /* no .BTF.ext, no problem */ + if (!obj->btf_ext || !prog->func_info) + return 0; + + /* some BPF program types just don't have named context structs, so + * this fallback mechanism doesn't work for them + */ + for (i = 0; i < ARRAY_SIZE(global_ctx_map); i++) { + if (global_ctx_map[i].prog_type != prog->type) + continue; + ctx_name = global_ctx_map[i].ctx_name; + break; + } + if (!ctx_name) + return 0; + + /* remember original func BTF IDs to detect if we already cloned them */ + orig_ids = calloc(prog->func_info_cnt, sizeof(*orig_ids)); + if (!orig_ids) + return -ENOMEM; + for (i = 0; i < prog->func_info_cnt; i++) { + func_rec = prog->func_info + prog->func_info_rec_size * i; + orig_ids[i] = func_rec->type_id; + } + + /* go through each DECL_TAG with "arg:ctx" and see if it points to one + * of our subprogs; if yes and subprog is global and needs adjustment, + * clone and adjust FUNC -> FUNC_PROTO combo + */ + for (i = 1, n = btf__type_cnt(btf); i < n; i++) { + /* only DECL_TAG with "arg:ctx" value are interesting */ + t = btf__type_by_id(btf, i); + if (!btf_is_decl_tag(t)) + continue; + if (strcmp(btf__str_by_offset(btf, t->name_off), ctx_tag) != 0) + continue; + + /* only global funcs need adjustment, if at all */ + orig_fn_id = t->type; + fn_t = btf_type_by_id(btf, orig_fn_id); + if (!btf_is_func(fn_t) || btf_func_linkage(fn_t) != BTF_FUNC_GLOBAL) + continue; + + /* sanity check FUNC -> FUNC_PROTO chain, just in case */ + fn_proto_t = btf_type_by_id(btf, fn_t->type); + if (!fn_proto_t || !btf_is_func_proto(fn_proto_t)) + continue; + + /* find corresponding func_info record */ + func_rec = NULL; + for (rec_idx = 0; rec_idx < prog->func_info_cnt; rec_idx++) { + if (orig_ids[rec_idx] == t->type) { + func_rec = prog->func_info + prog->func_info_rec_size * rec_idx; + break; + } + } + /* current main program doesn't call into this subprog */ + if (!func_rec) + continue; + + /* some more sanity checking of DECL_TAG */ + arg_cnt = btf_vlen(fn_proto_t); + arg_idx = btf_decl_tag(t)->component_idx; + if (arg_idx < 0 || arg_idx >= arg_cnt) + continue; + + /* check if existing parameter already matches verifier expectations */ + p = &btf_params(fn_proto_t)[arg_idx]; + t = skip_mods_and_typedefs(btf, p->type, NULL); + if (btf_is_ptr(t) && + (t = skip_mods_and_typedefs(btf, t->type, NULL)) && + btf_is_struct(t) && + strcmp(btf__str_by_offset(btf, t->name_off), ctx_name) == 0) { + continue; /* no need for fix up */ + } + + /* clone fn/fn_proto, unless we already did it for another arg */ + if (func_rec->type_id == orig_fn_id) { + int fn_id; + + fn_id = clone_func_btf_info(btf, orig_fn_id, prog); + if (fn_id < 0) { + err = fn_id; + goto err_out; + } + + /* point func_info record to a cloned FUNC type */ + func_rec->type_id = fn_id; + } + + /* create PTR -> STRUCT type chain to mark PTR_TO_CTX argument; + * we do it just once per main BPF program, as all global + * funcs share the same program type, so need only PTR -> + * STRUCT type chain + */ + if (ptr_id == 0) { + struct_id = btf__add_struct(btf, ctx_name, 0); + ptr_id = btf__add_ptr(btf, struct_id); + if (ptr_id < 0 || struct_id < 0) { + err = -EINVAL; + goto err_out; + } + } + + /* for completeness, clone DECL_TAG and point it to cloned param */ + tag_id = btf__add_decl_tag(btf, ctx_tag, func_rec->type_id, arg_idx); + if (tag_id < 0) { + err = -EINVAL; + goto err_out; + } + + /* all the BTF manipulations invalidated pointers, refetch them */ + fn_t = btf_type_by_id(btf, func_rec->type_id); + fn_proto_t = btf_type_by_id(btf, fn_t->type); + + /* fix up type ID pointed to by param */ + p = &btf_params(fn_proto_t)[arg_idx]; + p->type = ptr_id; + } + + free(orig_ids); + return 0; +err_out: + free(orig_ids); + return err; +} + +static int bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) { struct bpf_program *prog; size_t i, j; @@ -6689,6 +6962,9 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) return err; } + err = bpf_prog_assign_exc_cb(obj, prog); + if (err) + return err; /* Now, also append exception callback if it has not been done already. */ if (prog->exception_cb_idx >= 0) { struct bpf_program *subprog = &obj->programs[prog->exception_cb_idx]; @@ -6708,19 +6984,28 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) } } } - /* Process data relos for main programs */ for (i = 0; i < obj->nr_programs; i++) { prog = &obj->programs[i]; if (prog_is_subprog(obj, prog)) continue; if (!prog->autoload) continue; + + /* Process data relos for main programs */ err = bpf_object__relocate_data(obj, prog); if (err) { pr_warn("prog '%s': failed to relocate data references: %d\n", prog->name, err); return err; } + + /* Fix up .BTF.ext information, if necessary */ + err = bpf_program_fixup_func_info(obj, prog); + if (err) { + pr_warn("prog '%s': failed to perform .BTF.ext fix ups: %d\n", + prog->name, err); + return err; + } } return 0; @@ -7050,7 +7335,7 @@ static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog load_attr.prog_ifindex = prog->prog_ifindex; /* specify func_info/line_info only if kernel supports them */ - btf_fd = bpf_object__btf_fd(obj); + btf_fd = btf__fd(obj->btf); if (btf_fd >= 0 && kernel_supports(obj, FEAT_BTF_FUNC)) { load_attr.prog_btf_fd = btf_fd; load_attr.func_info = prog->func_info; @@ -7135,7 +7420,7 @@ static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog if (map->libbpf_type != LIBBPF_MAP_RODATA) continue; - if (bpf_prog_bind_map(ret, bpf_map__fd(map), NULL)) { + if (bpf_prog_bind_map(ret, map->fd, NULL)) { cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); pr_warn("prog '%s': failed to bind map '%s': %s\n", prog->name, map->real_name, cp); @@ -8067,11 +8352,11 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch err = bpf_object__probe_loading(obj); err = err ? : bpf_object__load_vmlinux_btf(obj, false); err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); - err = err ? : bpf_object__sanitize_and_load_btf(obj); err = err ? : bpf_object__sanitize_maps(obj); err = err ? : bpf_object__init_kern_struct_ops_maps(obj); - err = err ? : bpf_object__create_maps(obj); err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path); + err = err ? : bpf_object__sanitize_and_load_btf(obj); + err = err ? : bpf_object__create_maps(obj); err = err ? : bpf_object__load_progs(obj, extra_log_level); err = err ? : bpf_object_init_prog_arrays(obj); err = err ? : bpf_object_prepare_struct_ops(obj); @@ -8080,8 +8365,6 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch /* reset FDs */ if (obj->btf) btf__set_fd(obj->btf, -1); - for (i = 0; i < obj->nr_maps; i++) - obj->maps[i].fd = -1; if (!err) err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps); } @@ -9601,7 +9884,11 @@ int libbpf_attach_type_by_name(const char *name, int bpf_map__fd(const struct bpf_map *map) { - return map ? map->fd : libbpf_err(-EINVAL); + if (!map) + return libbpf_err(-EINVAL); + if (!map_is_created(map)) + return -1; + return map->fd; } static bool map_uses_real_name(const struct bpf_map *map) @@ -9637,7 +9924,7 @@ enum bpf_map_type bpf_map__type(const struct bpf_map *map) int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type) { - if (map->fd >= 0) + if (map_is_created(map)) return libbpf_err(-EBUSY); map->def.type = type; return 0; @@ -9650,7 +9937,7 @@ __u32 bpf_map__map_flags(const struct bpf_map *map) int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags) { - if (map->fd >= 0) + if (map_is_created(map)) return libbpf_err(-EBUSY); map->def.map_flags = flags; return 0; @@ -9663,7 +9950,7 @@ __u64 bpf_map__map_extra(const struct bpf_map *map) int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra) { - if (map->fd >= 0) + if (map_is_created(map)) return libbpf_err(-EBUSY); map->map_extra = map_extra; return 0; @@ -9676,7 +9963,7 @@ __u32 bpf_map__numa_node(const struct bpf_map *map) int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node) { - if (map->fd >= 0) + if (map_is_created(map)) return libbpf_err(-EBUSY); map->numa_node = numa_node; return 0; @@ -9689,7 +9976,7 @@ __u32 bpf_map__key_size(const struct bpf_map *map) int bpf_map__set_key_size(struct bpf_map *map, __u32 size) { - if (map->fd >= 0) + if (map_is_created(map)) return libbpf_err(-EBUSY); map->def.key_size = size; return 0; @@ -9773,7 +10060,7 @@ static int map_btf_datasec_resize(struct bpf_map *map, __u32 size) int bpf_map__set_value_size(struct bpf_map *map, __u32 size) { - if (map->fd >= 0) + if (map->obj->loaded || map->reused) return libbpf_err(-EBUSY); if (map->mmaped) { @@ -9814,8 +10101,11 @@ __u32 bpf_map__btf_value_type_id(const struct bpf_map *map) int bpf_map__set_initial_value(struct bpf_map *map, const void *data, size_t size) { + if (map->obj->loaded || map->reused) + return libbpf_err(-EBUSY); + if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG || - size != map->def.value_size || map->fd >= 0) + size != map->def.value_size) return libbpf_err(-EINVAL); memcpy(map->mmaped, data, size); @@ -9842,7 +10132,7 @@ __u32 bpf_map__ifindex(const struct bpf_map *map) int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex) { - if (map->fd >= 0) + if (map_is_created(map)) return libbpf_err(-EBUSY); map->map_ifindex = ifindex; return 0; @@ -9947,7 +10237,7 @@ bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name) static int validate_map_op(const struct bpf_map *map, size_t key_sz, size_t value_sz, bool check_value_sz) { - if (map->fd <= 0) + if (!map_is_created(map)) /* map is not yet created */ return -ENOENT; if (map->def.key_size != key_sz) { @@ -12400,7 +12690,7 @@ int bpf_link__update_map(struct bpf_link *link, const struct bpf_map *map) __u32 zero = 0; int err; - if (!bpf_map__is_struct_ops(map) || map->fd < 0) + if (!bpf_map__is_struct_ops(map) || !map_is_created(map)) return -EINVAL; st_ops_link = container_of(link, struct bpf_link_struct_ops, link); @@ -13304,7 +13594,7 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s) for (i = 0; i < s->map_cnt; i++) { struct bpf_map *map = *s->maps[i].map; size_t mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); - int prot, map_fd = bpf_map__fd(map); + int prot, map_fd = map->fd; void **mmaped = s->maps[i].mmaped; if (!mmaped) diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index b5d334754e5dc..27e4e320e1a69 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -555,6 +555,20 @@ static inline int ensure_good_fd(int fd) return fd; } +/* Point *fixed_fd* to the same file that *tmp_fd* points to. + * Regardless of success, *tmp_fd* is closed. + * Whatever *fixed_fd* pointed to is closed silently. + */ +static inline int reuse_fd(int fixed_fd, int tmp_fd) +{ + int err; + + err = dup2(tmp_fd, fixed_fd); + err = err < 0 ? -errno : 0; + close(tmp_fd); /* clean up temporary FD */ + return err; +} + /* The following two functions are exposed to bpftool */ int bpf_core_add_cands(struct bpf_core_cand *local_cand, size_t local_essent_len, diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 617ae55c3bb5b..fd15017ed3b12 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -383,6 +383,7 @@ CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH)) BPF_CFLAGS = -g -Wall -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \ -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR) \ -I$(abspath $(OUTPUT)/../usr/include) +# TODO: enable me -Wsign-compare CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \ -Wno-compare-distinct-pointer-types diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h index 1386baf9ae4ad..f44875f8b3678 100644 --- a/tools/testing/selftests/bpf/bpf_experimental.h +++ b/tools/testing/selftests/bpf/bpf_experimental.h @@ -254,173 +254,97 @@ extern void bpf_throw(u64 cookie) __ksym; } \ }) -/* Description - * Assert that a conditional expression is true. - * Returns - * Void. - * Throws - * An exception with the value zero when the assertion fails. - */ -#define bpf_assert(cond) if (!(cond)) bpf_throw(0); - -/* Description - * Assert that a conditional expression is true. - * Returns - * Void. - * Throws - * An exception with the specified value when the assertion fails. - */ -#define bpf_assert_with(cond, value) if (!(cond)) bpf_throw(value); - -/* Description - * Assert that LHS is equal to RHS. This statement updates the known value - * of LHS during verification. Note that RHS must be a constant value, and - * must fit within the data type of LHS. - * Returns - * Void. - * Throws - * An exception with the value zero when the assertion fails. - */ -#define bpf_assert_eq(LHS, RHS) \ - ({ \ - barrier_var(LHS); \ - __bpf_assert_op(LHS, ==, RHS, 0, true); \ - }) - -/* Description - * Assert that LHS is equal to RHS. This statement updates the known value - * of LHS during verification. Note that RHS must be a constant value, and - * must fit within the data type of LHS. - * Returns - * Void. - * Throws - * An exception with the specified value when the assertion fails. - */ -#define bpf_assert_eq_with(LHS, RHS, value) \ - ({ \ - barrier_var(LHS); \ - __bpf_assert_op(LHS, ==, RHS, value, true); \ - }) - -/* Description - * Assert that LHS is less than RHS. This statement updates the known - * bounds of LHS during verification. Note that RHS must be a constant - * value, and must fit within the data type of LHS. - * Returns - * Void. - * Throws - * An exception with the value zero when the assertion fails. - */ -#define bpf_assert_lt(LHS, RHS) \ - ({ \ - barrier_var(LHS); \ - __bpf_assert_op(LHS, <, RHS, 0, false); \ - }) - -/* Description - * Assert that LHS is less than RHS. This statement updates the known - * bounds of LHS during verification. Note that RHS must be a constant - * value, and must fit within the data type of LHS. - * Returns - * Void. - * Throws - * An exception with the specified value when the assertion fails. - */ -#define bpf_assert_lt_with(LHS, RHS, value) \ - ({ \ - barrier_var(LHS); \ - __bpf_assert_op(LHS, <, RHS, value, false); \ - }) +#define __cmp_cannot_be_signed(x) \ + __builtin_strcmp(#x, "==") == 0 || __builtin_strcmp(#x, "!=") == 0 || \ + __builtin_strcmp(#x, "&") == 0 -/* Description - * Assert that LHS is greater than RHS. This statement updates the known - * bounds of LHS during verification. Note that RHS must be a constant - * value, and must fit within the data type of LHS. - * Returns - * Void. - * Throws - * An exception with the value zero when the assertion fails. - */ -#define bpf_assert_gt(LHS, RHS) \ - ({ \ - barrier_var(LHS); \ - __bpf_assert_op(LHS, >, RHS, 0, false); \ - }) +#define __is_signed_type(type) (((type)(-1)) < (type)1) -/* Description - * Assert that LHS is greater than RHS. This statement updates the known - * bounds of LHS during verification. Note that RHS must be a constant - * value, and must fit within the data type of LHS. - * Returns - * Void. - * Throws - * An exception with the specified value when the assertion fails. +#define __bpf_cmp(LHS, OP, SIGN, PRED, RHS, DEFAULT) \ + ({ \ + __label__ l_true; \ + bool ret = DEFAULT; \ + asm volatile goto("if %[lhs] " SIGN #OP " %[rhs] goto %l[l_true]" \ + :: [lhs] "r"((short)LHS), [rhs] PRED (RHS) :: l_true); \ + ret = !DEFAULT; \ +l_true: \ + ret; \ + }) + +/* C type conversions coupled with comparison operator are tricky. + * Make sure BPF program is compiled with -Wsign-compare then + * __lhs OP __rhs below will catch the mistake. + * Be aware that we check only __lhs to figure out the sign of compare. */ -#define bpf_assert_gt_with(LHS, RHS, value) \ - ({ \ - barrier_var(LHS); \ - __bpf_assert_op(LHS, >, RHS, value, false); \ - }) +#define _bpf_cmp(LHS, OP, RHS, NOFLIP) \ + ({ \ + typeof(LHS) __lhs = (LHS); \ + typeof(RHS) __rhs = (RHS); \ + bool ret; \ + _Static_assert(sizeof(&(LHS)), "1st argument must be an lvalue expression"); \ + (void)(__lhs OP __rhs); \ + if (__cmp_cannot_be_signed(OP) || !__is_signed_type(typeof(__lhs))) { \ + if (sizeof(__rhs) == 8) \ + ret = __bpf_cmp(__lhs, OP, "", "r", __rhs, NOFLIP); \ + else \ + ret = __bpf_cmp(__lhs, OP, "", "i", __rhs, NOFLIP); \ + } else { \ + if (sizeof(__rhs) == 8) \ + ret = __bpf_cmp(__lhs, OP, "s", "r", __rhs, NOFLIP); \ + else \ + ret = __bpf_cmp(__lhs, OP, "s", "i", __rhs, NOFLIP); \ + } \ + ret; \ + }) + +#ifndef bpf_cmp_unlikely +#define bpf_cmp_unlikely(LHS, OP, RHS) _bpf_cmp(LHS, OP, RHS, true) +#endif -/* Description - * Assert that LHS is less than or equal to RHS. This statement updates the - * known bounds of LHS during verification. Note that RHS must be a - * constant value, and must fit within the data type of LHS. - * Returns - * Void. - * Throws - * An exception with the value zero when the assertion fails. - */ -#define bpf_assert_le(LHS, RHS) \ - ({ \ - barrier_var(LHS); \ - __bpf_assert_op(LHS, <=, RHS, 0, false); \ - }) +#ifndef bpf_cmp_likely +#define bpf_cmp_likely(LHS, OP, RHS) \ + ({ \ + bool ret; \ + if (__builtin_strcmp(#OP, "==") == 0) \ + ret = _bpf_cmp(LHS, !=, RHS, false); \ + else if (__builtin_strcmp(#OP, "!=") == 0) \ + ret = _bpf_cmp(LHS, ==, RHS, false); \ + else if (__builtin_strcmp(#OP, "<=") == 0) \ + ret = _bpf_cmp(LHS, >, RHS, false); \ + else if (__builtin_strcmp(#OP, "<") == 0) \ + ret = _bpf_cmp(LHS, >=, RHS, false); \ + else if (__builtin_strcmp(#OP, ">") == 0) \ + ret = _bpf_cmp(LHS, <=, RHS, false); \ + else if (__builtin_strcmp(#OP, ">=") == 0) \ + ret = _bpf_cmp(LHS, <, RHS, false); \ + else \ + (void) "bug"; \ + ret; \ + }) +#endif -/* Description - * Assert that LHS is less than or equal to RHS. This statement updates the - * known bounds of LHS during verification. Note that RHS must be a - * constant value, and must fit within the data type of LHS. - * Returns - * Void. - * Throws - * An exception with the specified value when the assertion fails. - */ -#define bpf_assert_le_with(LHS, RHS, value) \ - ({ \ - barrier_var(LHS); \ - __bpf_assert_op(LHS, <=, RHS, value, false); \ - }) +#ifndef bpf_nop_mov +#define bpf_nop_mov(var) \ + asm volatile("%[reg]=%[reg]"::[reg]"r"((short)var)) +#endif /* Description - * Assert that LHS is greater than or equal to RHS. This statement updates - * the known bounds of LHS during verification. Note that RHS must be a - * constant value, and must fit within the data type of LHS. + * Assert that a conditional expression is true. * Returns * Void. * Throws * An exception with the value zero when the assertion fails. */ -#define bpf_assert_ge(LHS, RHS) \ - ({ \ - barrier_var(LHS); \ - __bpf_assert_op(LHS, >=, RHS, 0, false); \ - }) +#define bpf_assert(cond) if (!(cond)) bpf_throw(0); /* Description - * Assert that LHS is greater than or equal to RHS. This statement updates - * the known bounds of LHS during verification. Note that RHS must be a - * constant value, and must fit within the data type of LHS. + * Assert that a conditional expression is true. * Returns * Void. * Throws * An exception with the specified value when the assertion fails. */ -#define bpf_assert_ge_with(LHS, RHS, value) \ - ({ \ - barrier_var(LHS); \ - __bpf_assert_op(LHS, >=, RHS, value, false); \ - }) +#define bpf_assert_with(cond, value) if (!(cond)) bpf_throw(value); /* Description * Assert that LHS is in the range [BEG, END] (inclusive of both). This diff --git a/tools/testing/selftests/bpf/config.aarch64 b/tools/testing/selftests/bpf/config.aarch64 index 29c8635c57220..3720b7611523b 100644 --- a/tools/testing/selftests/bpf/config.aarch64 +++ b/tools/testing/selftests/bpf/config.aarch64 @@ -11,7 +11,6 @@ CONFIG_BLK_DEV_IO_TRACE=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_SD=y CONFIG_BONDING=y -CONFIG_BPFILTER=y CONFIG_BPF_JIT_ALWAYS_ON=y CONFIG_BPF_JIT_DEFAULT_ON=y CONFIG_BPF_PRELOAD_UMD=y diff --git a/tools/testing/selftests/bpf/config.s390x b/tools/testing/selftests/bpf/config.s390x index e933303828494..706931a8c2c69 100644 --- a/tools/testing/selftests/bpf/config.s390x +++ b/tools/testing/selftests/bpf/config.s390x @@ -9,7 +9,6 @@ CONFIG_BPF_JIT_ALWAYS_ON=y CONFIG_BPF_JIT_DEFAULT_ON=y CONFIG_BPF_PRELOAD=y CONFIG_BPF_PRELOAD_UMD=y -CONFIG_BPFILTER=y CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_DEVICE=y CONFIG_CGROUP_FREEZER=y diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64 index b946088017f1d..5680befae8c6d 100644 --- a/tools/testing/selftests/bpf/config.x86_64 +++ b/tools/testing/selftests/bpf/config.x86_64 @@ -19,7 +19,6 @@ CONFIG_BOOTTIME_TRACING=y CONFIG_BPF_JIT_ALWAYS_ON=y CONFIG_BPF_PRELOAD=y CONFIG_BPF_PRELOAD_UMD=y -CONFIG_BPFILTER=y CONFIG_BSD_DISKLABEL=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_CFS_BANDWIDTH=y diff --git a/tools/testing/selftests/bpf/prog_tests/recursive_attach.c b/tools/testing/selftests/bpf/prog_tests/recursive_attach.c new file mode 100644 index 0000000000000..8100509e561b2 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/recursive_attach.c @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Red Hat, Inc. */ +#include +#include "fentry_recursive.skel.h" +#include "fentry_recursive_target.skel.h" +#include +#include "bpf/libbpf_internal.h" + +/* Test recursive attachment of tracing progs with more than one nesting level + * is not possible. Create a chain of attachment, verify that the last prog + * will fail. Depending on the arguments, following cases are tested: + * + * - Recursive loading of tracing progs, without attaching (attach = false, + * detach = false). The chain looks like this: + * load target + * load fentry1 -> target + * load fentry2 -> fentry1 (fail) + * + * - Recursive attach of tracing progs (attach = true, detach = false). The + * chain looks like this: + * load target + * load fentry1 -> target + * attach fentry1 -> target + * load fentry2 -> fentry1 (fail) + * + * - Recursive attach and detach of tracing progs (attach = true, detach = + * true). This validates that attach_tracing_prog flag will be set throughout + * the whole lifecycle of an fentry prog, independently from whether it's + * detached. The chain looks like this: + * load target + * load fentry1 -> target + * attach fentry1 -> target + * detach fentry1 + * load fentry2 -> fentry1 (fail) + */ +static void test_recursive_fentry_chain(bool attach, bool detach) +{ + struct fentry_recursive_target *target_skel = NULL; + struct fentry_recursive *tracing_chain[2] = {}; + struct bpf_program *prog; + int prev_fd, err; + + target_skel = fentry_recursive_target__open_and_load(); + if (!ASSERT_OK_PTR(target_skel, "fentry_recursive_target__open_and_load")) + return; + + /* Create an attachment chain with two fentry progs */ + for (int i = 0; i < 2; i++) { + tracing_chain[i] = fentry_recursive__open(); + if (!ASSERT_OK_PTR(tracing_chain[i], "fentry_recursive__open")) + goto close_prog; + + /* The first prog in the chain is going to be attached to the target + * fentry program, the second one to the previous in the chain. + */ + prog = tracing_chain[i]->progs.recursive_attach; + if (i == 0) { + prev_fd = bpf_program__fd(target_skel->progs.test1); + err = bpf_program__set_attach_target(prog, prev_fd, "test1"); + } else { + prev_fd = bpf_program__fd(tracing_chain[i-1]->progs.recursive_attach); + err = bpf_program__set_attach_target(prog, prev_fd, "recursive_attach"); + } + + if (!ASSERT_OK(err, "bpf_program__set_attach_target")) + goto close_prog; + + err = fentry_recursive__load(tracing_chain[i]); + /* The first attach should succeed, the second fail */ + if (i == 0) { + if (!ASSERT_OK(err, "fentry_recursive__load")) + goto close_prog; + + if (attach) { + err = fentry_recursive__attach(tracing_chain[i]); + if (!ASSERT_OK(err, "fentry_recursive__attach")) + goto close_prog; + } + + if (detach) { + /* Flag attach_tracing_prog should still be set, preventing + * attachment of the following prog. + */ + fentry_recursive__detach(tracing_chain[i]); + } + } else { + if (!ASSERT_ERR(err, "fentry_recursive__load")) + goto close_prog; + } + } + +close_prog: + fentry_recursive_target__destroy(target_skel); + for (int i = 0; i < 2; i++) { + fentry_recursive__destroy(tracing_chain[i]); + } +} + +void test_recursive_fentry(void) +{ + if (test__start_subtest("attach")) + test_recursive_fentry_chain(true, false); + if (test__start_subtest("load")) + test_recursive_fentry_chain(false, false); + if (test__start_subtest("detach")) + test_recursive_fentry_chain(true, true); +} + +/* Test that a tracing prog reattachment (when we land in + * "prog->aux->dst_trampoline and tgt_prog is NULL" branch in + * bpf_tracing_prog_attach) does not lead to a crash due to missing attach_btf + */ +void test_fentry_attach_btf_presence(void) +{ + struct fentry_recursive_target *target_skel = NULL; + struct fentry_recursive *tracing_skel = NULL; + struct bpf_program *prog; + int err, link_fd, tgt_prog_fd; + + target_skel = fentry_recursive_target__open_and_load(); + if (!ASSERT_OK_PTR(target_skel, "fentry_recursive_target__open_and_load")) + goto close_prog; + + tracing_skel = fentry_recursive__open(); + if (!ASSERT_OK_PTR(tracing_skel, "fentry_recursive__open")) + goto close_prog; + + prog = tracing_skel->progs.recursive_attach; + tgt_prog_fd = bpf_program__fd(target_skel->progs.fentry_target); + err = bpf_program__set_attach_target(prog, tgt_prog_fd, "fentry_target"); + if (!ASSERT_OK(err, "bpf_program__set_attach_target")) + goto close_prog; + + err = fentry_recursive__load(tracing_skel); + if (!ASSERT_OK(err, "fentry_recursive__load")) + goto close_prog; + + tgt_prog_fd = bpf_program__fd(tracing_skel->progs.recursive_attach); + link_fd = bpf_link_create(tgt_prog_fd, 0, BPF_TRACE_FENTRY, NULL); + if (!ASSERT_GE(link_fd, 0, "link_fd")) + goto close_prog; + + fentry_recursive__detach(tracing_skel); + + err = fentry_recursive__attach(tracing_skel); + ASSERT_ERR(err, "fentry_recursive__attach"); + +close_prog: + fentry_recursive_target__destroy(target_skel); + fentry_recursive__destroy(tracing_skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 7c2241fae19a6..77e26ecffa9d7 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -555,6 +555,213 @@ static void test_sockmap_unconnected_unix(void) close(dgram); } +static void test_sockmap_many_socket(void) +{ + struct test_sockmap_pass_prog *skel; + int stream[2], dgram, udp, tcp; + int i, err, map, entry = 0; + + skel = test_sockmap_pass_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + map = bpf_map__fd(skel->maps.sock_map_rx); + + dgram = xsocket(AF_UNIX, SOCK_DGRAM, 0); + if (dgram < 0) { + test_sockmap_pass_prog__destroy(skel); + return; + } + + tcp = connected_socket_v4(); + if (!ASSERT_GE(tcp, 0, "connected_socket_v4")) { + close(dgram); + test_sockmap_pass_prog__destroy(skel); + return; + } + + udp = xsocket(AF_INET, SOCK_DGRAM | SOCK_NONBLOCK, 0); + if (udp < 0) { + close(dgram); + close(tcp); + test_sockmap_pass_prog__destroy(skel); + return; + } + + err = socketpair(AF_UNIX, SOCK_STREAM, 0, stream); + ASSERT_OK(err, "socketpair(af_unix, sock_stream)"); + if (err) + goto out; + + for (i = 0; i < 2; i++, entry++) { + err = bpf_map_update_elem(map, &entry, &stream[0], BPF_ANY); + ASSERT_OK(err, "bpf_map_update_elem(stream)"); + } + for (i = 0; i < 2; i++, entry++) { + err = bpf_map_update_elem(map, &entry, &dgram, BPF_ANY); + ASSERT_OK(err, "bpf_map_update_elem(dgram)"); + } + for (i = 0; i < 2; i++, entry++) { + err = bpf_map_update_elem(map, &entry, &udp, BPF_ANY); + ASSERT_OK(err, "bpf_map_update_elem(udp)"); + } + for (i = 0; i < 2; i++, entry++) { + err = bpf_map_update_elem(map, &entry, &tcp, BPF_ANY); + ASSERT_OK(err, "bpf_map_update_elem(tcp)"); + } + for (entry--; entry >= 0; entry--) { + err = bpf_map_delete_elem(map, &entry); + ASSERT_OK(err, "bpf_map_delete_elem(entry)"); + } + + close(stream[0]); + close(stream[1]); +out: + close(dgram); + close(tcp); + close(udp); + test_sockmap_pass_prog__destroy(skel); +} + +static void test_sockmap_many_maps(void) +{ + struct test_sockmap_pass_prog *skel; + int stream[2], dgram, udp, tcp; + int i, err, map[2], entry = 0; + + skel = test_sockmap_pass_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + map[0] = bpf_map__fd(skel->maps.sock_map_rx); + map[1] = bpf_map__fd(skel->maps.sock_map_tx); + + dgram = xsocket(AF_UNIX, SOCK_DGRAM, 0); + if (dgram < 0) { + test_sockmap_pass_prog__destroy(skel); + return; + } + + tcp = connected_socket_v4(); + if (!ASSERT_GE(tcp, 0, "connected_socket_v4")) { + close(dgram); + test_sockmap_pass_prog__destroy(skel); + return; + } + + udp = xsocket(AF_INET, SOCK_DGRAM | SOCK_NONBLOCK, 0); + if (udp < 0) { + close(dgram); + close(tcp); + test_sockmap_pass_prog__destroy(skel); + return; + } + + err = socketpair(AF_UNIX, SOCK_STREAM, 0, stream); + ASSERT_OK(err, "socketpair(af_unix, sock_stream)"); + if (err) + goto out; + + for (i = 0; i < 2; i++, entry++) { + err = bpf_map_update_elem(map[i], &entry, &stream[0], BPF_ANY); + ASSERT_OK(err, "bpf_map_update_elem(stream)"); + } + for (i = 0; i < 2; i++, entry++) { + err = bpf_map_update_elem(map[i], &entry, &dgram, BPF_ANY); + ASSERT_OK(err, "bpf_map_update_elem(dgram)"); + } + for (i = 0; i < 2; i++, entry++) { + err = bpf_map_update_elem(map[i], &entry, &udp, BPF_ANY); + ASSERT_OK(err, "bpf_map_update_elem(udp)"); + } + for (i = 0; i < 2; i++, entry++) { + err = bpf_map_update_elem(map[i], &entry, &tcp, BPF_ANY); + ASSERT_OK(err, "bpf_map_update_elem(tcp)"); + } + for (entry--; entry >= 0; entry--) { + err = bpf_map_delete_elem(map[1], &entry); + entry--; + ASSERT_OK(err, "bpf_map_delete_elem(entry)"); + err = bpf_map_delete_elem(map[0], &entry); + ASSERT_OK(err, "bpf_map_delete_elem(entry)"); + } + + close(stream[0]); + close(stream[1]); +out: + close(dgram); + close(tcp); + close(udp); + test_sockmap_pass_prog__destroy(skel); +} + +static void test_sockmap_same_sock(void) +{ + struct test_sockmap_pass_prog *skel; + int stream[2], dgram, udp, tcp; + int i, err, map, zero = 0; + + skel = test_sockmap_pass_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + map = bpf_map__fd(skel->maps.sock_map_rx); + + dgram = xsocket(AF_UNIX, SOCK_DGRAM, 0); + if (dgram < 0) { + test_sockmap_pass_prog__destroy(skel); + return; + } + + tcp = connected_socket_v4(); + if (!ASSERT_GE(tcp, 0, "connected_socket_v4")) { + close(dgram); + test_sockmap_pass_prog__destroy(skel); + return; + } + + udp = xsocket(AF_INET, SOCK_DGRAM | SOCK_NONBLOCK, 0); + if (udp < 0) { + close(dgram); + close(tcp); + test_sockmap_pass_prog__destroy(skel); + return; + } + + err = socketpair(AF_UNIX, SOCK_STREAM, 0, stream); + ASSERT_OK(err, "socketpair(af_unix, sock_stream)"); + if (err) + goto out; + + for (i = 0; i < 2; i++) { + err = bpf_map_update_elem(map, &zero, &stream[0], BPF_ANY); + ASSERT_OK(err, "bpf_map_update_elem(stream)"); + } + for (i = 0; i < 2; i++) { + err = bpf_map_update_elem(map, &zero, &dgram, BPF_ANY); + ASSERT_OK(err, "bpf_map_update_elem(dgram)"); + } + for (i = 0; i < 2; i++) { + err = bpf_map_update_elem(map, &zero, &udp, BPF_ANY); + ASSERT_OK(err, "bpf_map_update_elem(udp)"); + } + for (i = 0; i < 2; i++) { + err = bpf_map_update_elem(map, &zero, &tcp, BPF_ANY); + ASSERT_OK(err, "bpf_map_update_elem(tcp)"); + } + + err = bpf_map_delete_elem(map, &zero); + ASSERT_OK(err, "bpf_map_delete_elem(entry)"); + + close(stream[0]); + close(stream[1]); +out: + close(dgram); + close(tcp); + close(udp); + test_sockmap_pass_prog__destroy(skel); +} + void test_sockmap_basic(void) { if (test__start_subtest("sockmap create_update_free")) @@ -597,7 +804,12 @@ void test_sockmap_basic(void) test_sockmap_skb_verdict_fionread(false); if (test__start_subtest("sockmap skb_verdict msg_f_peek")) test_sockmap_skb_verdict_peek(); - if (test__start_subtest("sockmap unconnected af_unix")) test_sockmap_unconnected_unix(); + if (test__start_subtest("sockmap one socket to many map entries")) + test_sockmap_many_socket(); + if (test__start_subtest("sockmap one socket to many maps")) + test_sockmap_many_maps(); + if (test__start_subtest("sockmap same socket replace")) + test_sockmap_same_sock(); } diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c b/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c index d3491a84b3b98..ccae0b31ac6c6 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c +++ b/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c @@ -14,7 +14,8 @@ static void do_bpf_ma_test(const char *name) struct test_bpf_ma *skel; struct bpf_program *prog; struct btf *btf; - int i, err; + int i, err, id; + char tname[32]; skel = test_bpf_ma__open(); if (!ASSERT_OK_PTR(skel, "open")) @@ -25,16 +26,21 @@ static void do_bpf_ma_test(const char *name) goto out; for (i = 0; i < ARRAY_SIZE(skel->rodata->data_sizes); i++) { - char name[32]; - int id; - - snprintf(name, sizeof(name), "bin_data_%u", skel->rodata->data_sizes[i]); - id = btf__find_by_name_kind(btf, name, BTF_KIND_STRUCT); - if (!ASSERT_GT(id, 0, "bin_data")) + snprintf(tname, sizeof(tname), "bin_data_%u", skel->rodata->data_sizes[i]); + id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT); + if (!ASSERT_GT(id, 0, tname)) goto out; skel->rodata->data_btf_ids[i] = id; } + for (i = 0; i < ARRAY_SIZE(skel->rodata->percpu_data_sizes); i++) { + snprintf(tname, sizeof(tname), "percpu_bin_data_%u", skel->rodata->percpu_data_sizes[i]); + id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT); + if (!ASSERT_GT(id, 0, tname)) + goto out; + skel->rodata->percpu_data_btf_ids[i] = id; + } + prog = bpf_object__find_program_by_name(skel->obj, name); if (!ASSERT_OK_PTR(prog, "invalid prog name")) goto out; diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c index e0879df38639b..67d4ef9e62b37 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c +++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c @@ -20,6 +20,109 @@ #include "test_global_func17.skel.h" #include "test_global_func_ctx_args.skel.h" +#include "bpf/libbpf_internal.h" +#include "btf_helpers.h" + +static void check_ctx_arg_type(const struct btf *btf, const struct btf_param *p) +{ + const struct btf_type *t; + const char *s; + + t = btf__type_by_id(btf, p->type); + if (!ASSERT_EQ(btf_kind(t), BTF_KIND_PTR, "ptr_t")) + return; + + s = btf_type_raw_dump(btf, t->type); + if (!ASSERT_HAS_SUBSTR(s, "STRUCT 'bpf_perf_event_data' size=0 vlen=0", + "ctx_struct_t")) + return; +} + +static void subtest_ctx_arg_rewrite(void) +{ + struct test_global_func_ctx_args *skel = NULL; + struct bpf_prog_info info; + char func_info_buf[1024] __attribute__((aligned(8))); + struct bpf_func_info_min *rec; + struct btf *btf = NULL; + __u32 info_len = sizeof(info); + int err, fd, i; + + skel = test_global_func_ctx_args__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + bpf_program__set_autoload(skel->progs.arg_tag_ctx_perf, true); + + err = test_global_func_ctx_args__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto out; + + memset(&info, 0, sizeof(info)); + info.func_info = ptr_to_u64(&func_info_buf); + info.nr_func_info = 3; + info.func_info_rec_size = sizeof(struct bpf_func_info_min); + + fd = bpf_program__fd(skel->progs.arg_tag_ctx_perf); + err = bpf_prog_get_info_by_fd(fd, &info, &info_len); + if (!ASSERT_OK(err, "prog_info")) + goto out; + + if (!ASSERT_EQ(info.nr_func_info, 3, "nr_func_info")) + goto out; + + btf = btf__load_from_kernel_by_id(info.btf_id); + if (!ASSERT_OK_PTR(btf, "obj_kern_btf")) + goto out; + + rec = (struct bpf_func_info_min *)func_info_buf; + for (i = 0; i < info.nr_func_info; i++, rec = (void *)rec + info.func_info_rec_size) { + const struct btf_type *fn_t, *proto_t; + const char *name; + + if (rec->insn_off == 0) + continue; /* main prog, skip */ + + fn_t = btf__type_by_id(btf, rec->type_id); + if (!ASSERT_OK_PTR(fn_t, "fn_type")) + goto out; + if (!ASSERT_EQ(btf_kind(fn_t), BTF_KIND_FUNC, "fn_type_kind")) + goto out; + proto_t = btf__type_by_id(btf, fn_t->type); + if (!ASSERT_OK_PTR(proto_t, "proto_type")) + goto out; + + name = btf__name_by_offset(btf, fn_t->name_off); + if (strcmp(name, "subprog_ctx_tag") == 0) { + /* int subprog_ctx_tag(void *ctx __arg_ctx) */ + if (!ASSERT_EQ(btf_vlen(proto_t), 1, "arg_cnt")) + goto out; + + /* arg 0 is PTR -> STRUCT bpf_perf_event_data */ + check_ctx_arg_type(btf, &btf_params(proto_t)[0]); + } else if (strcmp(name, "subprog_multi_ctx_tags") == 0) { + /* int subprog_multi_ctx_tags(void *ctx1 __arg_ctx, + * struct my_struct *mem, + * void *ctx2 __arg_ctx) + */ + if (!ASSERT_EQ(btf_vlen(proto_t), 3, "arg_cnt")) + goto out; + + /* arg 0 is PTR -> STRUCT bpf_perf_event_data */ + check_ctx_arg_type(btf, &btf_params(proto_t)[0]); + /* arg 2 is PTR -> STRUCT bpf_perf_event_data */ + check_ctx_arg_type(btf, &btf_params(proto_t)[2]); + } else { + ASSERT_FAIL("unexpected subprog %s", name); + goto out; + } + } + +out: + btf__free(btf); + test_global_func_ctx_args__destroy(skel); +} + void test_test_global_funcs(void) { RUN_TESTS(test_global_func1); @@ -40,4 +143,7 @@ void test_test_global_funcs(void) RUN_TESTS(test_global_func16); RUN_TESTS(test_global_func17); RUN_TESTS(test_global_func_ctx_args); + + if (test__start_subtest("ctx_arg_rewrite")) + subtest_ctx_arg_rewrite(); } diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c index feaaa2b89c574..5014a17d6c02f 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c @@ -20,7 +20,7 @@ struct { } hashmap1 SEC(".maps"); /* will set before prog run */ -volatile const __u32 num_cpus = 0; +volatile const __s32 num_cpus = 0; /* will collect results during prog run */ __u32 key_sum_a = 0, key_sum_b = 0, key_sum_c = 0; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c index dd923dc637d5c..423b39e60b6f1 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c @@ -35,7 +35,7 @@ SEC("iter/task_vma") int proc_maps(struct bpf_iter__task_vma *ctx) return 0; file = vma->vm_file; - if (task->tgid != pid) { + if (task->tgid != (pid_t)pid) { if (one_task) one_task_error = 1; return 0; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c b/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c index 96131b9a1caae..6cbb3393f2438 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c @@ -22,7 +22,7 @@ int dump_task(struct bpf_iter__task *ctx) return 0; } - if (task->pid != tid) + if (task->pid != (pid_t)tid) num_unknown_tid++; else num_known_tid++; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c index 400fdf8d62332..dbf61c44acac6 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c @@ -45,7 +45,7 @@ int dump_bpf_map(struct bpf_iter__bpf_map *ctx) } /* fill seq_file buffer */ - for (i = 0; i < print_len; i++) + for (i = 0; i < (int)print_len; i++) bpf_seq_write(seq, &seq_num, sizeof(seq_num)); return ret; diff --git a/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c index b7fa8804e19de..45a0e9f492a97 100644 --- a/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c +++ b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c @@ -11,7 +11,7 @@ __u32 invocations = 0; __u32 assertion_error = 0; __u32 retval_value = 0; -__u32 page_size = 0; +__s32 page_size = 0; SEC("cgroup/setsockopt") int get_retval(struct bpf_sockopt *ctx) diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c index facedd8b82500..5e282c16eadc5 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c +++ b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c @@ -15,7 +15,7 @@ struct { __type(value, long); } map_a SEC(".maps"); -__u32 target_pid; +__s32 target_pid; __u64 cgroup_id; int target_hid; bool is_cgroup1; diff --git a/tools/testing/selftests/bpf/progs/cpumask_success.c b/tools/testing/selftests/bpf/progs/cpumask_success.c index fc3666edf4561..7a1e64c6c065c 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_success.c +++ b/tools/testing/selftests/bpf/progs/cpumask_success.c @@ -332,7 +332,7 @@ SEC("tp_btf/task_newtask") int BPF_PROG(test_copy_any_anyand, struct task_struct *task, u64 clone_flags) { struct bpf_cpumask *mask1, *mask2, *dst1, *dst2; - u32 cpu; + int cpu; if (!is_test_task()) return 0; diff --git a/tools/testing/selftests/bpf/progs/exceptions.c b/tools/testing/selftests/bpf/progs/exceptions.c index 2811ee842b01e..f09cd14d8e040 100644 --- a/tools/testing/selftests/bpf/progs/exceptions.c +++ b/tools/testing/selftests/bpf/progs/exceptions.c @@ -210,7 +210,7 @@ __noinline int assert_zero_gfunc(u64 c) { volatile u64 cookie = c; - bpf_assert_eq(cookie, 0); + bpf_assert(bpf_cmp_unlikely(cookie, ==, 0)); return 0; } @@ -218,7 +218,7 @@ __noinline int assert_neg_gfunc(s64 c) { volatile s64 cookie = c; - bpf_assert_lt(cookie, 0); + bpf_assert(bpf_cmp_unlikely(cookie, <, 0)); return 0; } @@ -226,7 +226,7 @@ __noinline int assert_pos_gfunc(s64 c) { volatile s64 cookie = c; - bpf_assert_gt(cookie, 0); + bpf_assert(bpf_cmp_unlikely(cookie, >, 0)); return 0; } @@ -234,7 +234,7 @@ __noinline int assert_negeq_gfunc(s64 c) { volatile s64 cookie = c; - bpf_assert_le(cookie, -1); + bpf_assert(bpf_cmp_unlikely(cookie, <=, -1)); return 0; } @@ -242,7 +242,7 @@ __noinline int assert_poseq_gfunc(s64 c) { volatile s64 cookie = c; - bpf_assert_ge(cookie, 1); + bpf_assert(bpf_cmp_unlikely(cookie, >=, 1)); return 0; } @@ -258,7 +258,7 @@ __noinline int assert_zero_gfunc_with(u64 c) { volatile u64 cookie = c; - bpf_assert_eq_with(cookie, 0, cookie + 100); + bpf_assert_with(bpf_cmp_unlikely(cookie, ==, 0), cookie + 100); return 0; } @@ -266,7 +266,7 @@ __noinline int assert_neg_gfunc_with(s64 c) { volatile s64 cookie = c; - bpf_assert_lt_with(cookie, 0, cookie + 100); + bpf_assert_with(bpf_cmp_unlikely(cookie, <, 0), cookie + 100); return 0; } @@ -274,7 +274,7 @@ __noinline int assert_pos_gfunc_with(s64 c) { volatile s64 cookie = c; - bpf_assert_gt_with(cookie, 0, cookie + 100); + bpf_assert_with(bpf_cmp_unlikely(cookie, >, 0), cookie + 100); return 0; } @@ -282,7 +282,7 @@ __noinline int assert_negeq_gfunc_with(s64 c) { volatile s64 cookie = c; - bpf_assert_le_with(cookie, -1, cookie + 100); + bpf_assert_with(bpf_cmp_unlikely(cookie, <=, -1), cookie + 100); return 0; } @@ -290,7 +290,7 @@ __noinline int assert_poseq_gfunc_with(s64 c) { volatile s64 cookie = c; - bpf_assert_ge_with(cookie, 1, cookie + 100); + bpf_assert_with(bpf_cmp_unlikely(cookie, >=, 1), cookie + 100); return 0; } diff --git a/tools/testing/selftests/bpf/progs/exceptions_assert.c b/tools/testing/selftests/bpf/progs/exceptions_assert.c index 0ef81040da596..5e0a1ca96d4e2 100644 --- a/tools/testing/selftests/bpf/progs/exceptions_assert.c +++ b/tools/testing/selftests/bpf/progs/exceptions_assert.c @@ -11,51 +11,51 @@ #define check_assert(type, op, name, value) \ SEC("?tc") \ __log_level(2) __failure \ - int check_assert_##op##_##name(void *ctx) \ + int check_assert_##name(void *ctx) \ { \ type num = bpf_ktime_get_ns(); \ - bpf_assert_##op(num, value); \ + bpf_assert(bpf_cmp_unlikely(num, op, value)); \ return *(u64 *)num; \ } -__msg(": R0_w=0xffffffff80000000 R10=fp0") -check_assert(s64, eq, int_min, INT_MIN); -__msg(": R0_w=0x7fffffff R10=fp0") -check_assert(s64, eq, int_max, INT_MAX); -__msg(": R0_w=0 R10=fp0") -check_assert(s64, eq, zero, 0); -__msg(": R0_w=0x8000000000000000 R1_w=0x8000000000000000 R10=fp0") -check_assert(s64, eq, llong_min, LLONG_MIN); -__msg(": R0_w=0x7fffffffffffffff R1_w=0x7fffffffffffffff R10=fp0") -check_assert(s64, eq, llong_max, LLONG_MAX); - -__msg(": R0_w=scalar(smax=0x7ffffffe) R10=fp0") -check_assert(s64, lt, pos, INT_MAX); -__msg(": R0_w=scalar(smax=-1,umin=0x8000000000000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))") -check_assert(s64, lt, zero, 0); -__msg(": R0_w=scalar(smax=0xffffffff7fffffff,umin=0x8000000000000000,umax=0xffffffff7fffffff,var_off=(0x8000000000000000; 0x7fffffffffffffff))") -check_assert(s64, lt, neg, INT_MIN); - -__msg(": R0_w=scalar(smax=0x7fffffff) R10=fp0") -check_assert(s64, le, pos, INT_MAX); -__msg(": R0_w=scalar(smax=0) R10=fp0") -check_assert(s64, le, zero, 0); -__msg(": R0_w=scalar(smax=0xffffffff80000000,umin=0x8000000000000000,umax=0xffffffff80000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))") -check_assert(s64, le, neg, INT_MIN); - -__msg(": R0_w=scalar(smin=umin=0x80000000,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") -check_assert(s64, gt, pos, INT_MAX); -__msg(": R0_w=scalar(smin=umin=1,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") -check_assert(s64, gt, zero, 0); -__msg(": R0_w=scalar(smin=0xffffffff80000001) R10=fp0") -check_assert(s64, gt, neg, INT_MIN); - -__msg(": R0_w=scalar(smin=umin=0x7fffffff,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") -check_assert(s64, ge, pos, INT_MAX); -__msg(": R0_w=scalar(smin=0,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff)) R10=fp0") -check_assert(s64, ge, zero, 0); -__msg(": R0_w=scalar(smin=0xffffffff80000000) R10=fp0") -check_assert(s64, ge, neg, INT_MIN); +__msg(": R0_w=0xffffffff80000000") +check_assert(s64, ==, eq_int_min, INT_MIN); +__msg(": R0_w=0x7fffffff") +check_assert(s64, ==, eq_int_max, INT_MAX); +__msg(": R0_w=0") +check_assert(s64, ==, eq_zero, 0); +__msg(": R0_w=0x8000000000000000 R1_w=0x8000000000000000") +check_assert(s64, ==, eq_llong_min, LLONG_MIN); +__msg(": R0_w=0x7fffffffffffffff R1_w=0x7fffffffffffffff") +check_assert(s64, ==, eq_llong_max, LLONG_MAX); + +__msg(": R0_w=scalar(id=1,smax=0x7ffffffe)") +check_assert(s64, <, lt_pos, INT_MAX); +__msg(": R0_w=scalar(id=1,smax=-1,umin=0x8000000000000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))") +check_assert(s64, <, lt_zero, 0); +__msg(": R0_w=scalar(id=1,smax=0xffffffff7fffffff") +check_assert(s64, <, lt_neg, INT_MIN); + +__msg(": R0_w=scalar(id=1,smax=0x7fffffff)") +check_assert(s64, <=, le_pos, INT_MAX); +__msg(": R0_w=scalar(id=1,smax=0)") +check_assert(s64, <=, le_zero, 0); +__msg(": R0_w=scalar(id=1,smax=0xffffffff80000000") +check_assert(s64, <=, le_neg, INT_MIN); + +__msg(": R0_w=scalar(id=1,smin=umin=0x80000000,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") +check_assert(s64, >, gt_pos, INT_MAX); +__msg(": R0_w=scalar(id=1,smin=umin=1,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") +check_assert(s64, >, gt_zero, 0); +__msg(": R0_w=scalar(id=1,smin=0xffffffff80000001") +check_assert(s64, >, gt_neg, INT_MIN); + +__msg(": R0_w=scalar(id=1,smin=umin=0x7fffffff,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") +check_assert(s64, >=, ge_pos, INT_MAX); +__msg(": R0_w=scalar(id=1,smin=0,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") +check_assert(s64, >=, ge_zero, 0); +__msg(": R0_w=scalar(id=1,smin=0xffffffff80000000") +check_assert(s64, >=, ge_neg, INT_MIN); SEC("?tc") __log_level(2) __failure diff --git a/tools/testing/selftests/bpf/progs/fentry_recursive.c b/tools/testing/selftests/bpf/progs/fentry_recursive.c new file mode 100644 index 0000000000000..2c9fb5ac42b23 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/fentry_recursive.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Red Hat, Inc. */ +#include +#include +#include + +char _license[] SEC("license") = "GPL"; + +/* Dummy fentry bpf prog for testing fentry attachment chains */ +SEC("fentry/XXX") +int BPF_PROG(recursive_attach, int a) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/fentry_recursive_target.c b/tools/testing/selftests/bpf/progs/fentry_recursive_target.c new file mode 100644 index 0000000000000..267c876d0aba5 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/fentry_recursive_target.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Red Hat, Inc. */ +#include +#include +#include + +char _license[] SEC("license") = "GPL"; + +/* Dummy fentry bpf prog for testing fentry attachment chains. It's going to be + * a start of the chain. + */ +SEC("fentry/bpf_testmod_fentry_test1") +int BPF_PROG(test1, int a) +{ + return 0; +} + +/* Dummy bpf prog for testing attach_btf presence when attaching an fentry + * program. + */ +SEC("raw_tp/sys_enter") +int BPF_PROG(fentry_target, struct pt_regs *regs, long id) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index 3aca3dc145b55..fe971992e6359 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -6,7 +6,7 @@ #include #include "bpf_misc.h" -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof((x)[0])) static volatile int zero = 0; @@ -676,7 +676,7 @@ static __noinline int sum(struct bpf_iter_num *it, int *arr, __u32 n) while ((t = bpf_iter_num_next(it))) { i = *t; - if (i >= n) + if ((__u32)i >= n) break; sum += arr[i]; } diff --git a/tools/testing/selftests/bpf/progs/iters_task_vma.c b/tools/testing/selftests/bpf/progs/iters_task_vma.c index e085a51d153ee..dc0c3691dcc2b 100644 --- a/tools/testing/selftests/bpf/progs/iters_task_vma.c +++ b/tools/testing/selftests/bpf/progs/iters_task_vma.c @@ -28,9 +28,8 @@ int iter_task_vma_for_each(const void *ctx) return 0; bpf_for_each(task_vma, vma, task, 0) { - if (seen >= 1000) + if (bpf_cmp_unlikely(seen, >=, 1000)) break; - barrier_var(seen); vm_ranges[seen].vm_start = vma->vm_start; vm_ranges[seen].vm_end = vma->vm_end; diff --git a/tools/testing/selftests/bpf/progs/linked_funcs1.c b/tools/testing/selftests/bpf/progs/linked_funcs1.c index c4b49ceea967b..cc79dddac182c 100644 --- a/tools/testing/selftests/bpf/progs/linked_funcs1.c +++ b/tools/testing/selftests/bpf/progs/linked_funcs1.c @@ -8,7 +8,7 @@ #include "bpf_misc.h" /* weak and shared between two files */ -const volatile int my_tid __weak; +const volatile __u32 my_tid __weak; long syscall_id __weak; int output_val1; diff --git a/tools/testing/selftests/bpf/progs/linked_funcs2.c b/tools/testing/selftests/bpf/progs/linked_funcs2.c index 013ff0645f0cc..942cc5526ddf0 100644 --- a/tools/testing/selftests/bpf/progs/linked_funcs2.c +++ b/tools/testing/selftests/bpf/progs/linked_funcs2.c @@ -68,7 +68,7 @@ int BPF_PROG(handler2, struct pt_regs *regs, long id) { static volatile int whatever; - if (my_tid != (u32)bpf_get_current_pid_tgid() || id != syscall_id) + if (my_tid != (s32)bpf_get_current_pid_tgid() || id != syscall_id) return 0; /* make sure we have CO-RE relocations in main program */ diff --git a/tools/testing/selftests/bpf/progs/linked_list.c b/tools/testing/selftests/bpf/progs/linked_list.c index 84d1777a9e6c9..26205ca806796 100644 --- a/tools/testing/selftests/bpf/progs/linked_list.c +++ b/tools/testing/selftests/bpf/progs/linked_list.c @@ -6,7 +6,7 @@ #include "bpf_experimental.h" #ifndef ARRAY_SIZE -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof((x)[0])) #endif #include "linked_list.h" diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c index bc8ea56671a16..e5e3a8b8dd075 100644 --- a/tools/testing/selftests/bpf/progs/local_storage.c +++ b/tools/testing/selftests/bpf/progs/local_storage.c @@ -13,7 +13,7 @@ char _license[] SEC("license") = "GPL"; #define DUMMY_STORAGE_VALUE 0xdeadbeef -int monitored_pid = 0; +__u32 monitored_pid = 0; int inode_storage_result = -1; int sk_storage_result = -1; int task_storage_result = -1; diff --git a/tools/testing/selftests/bpf/progs/lsm.c b/tools/testing/selftests/bpf/progs/lsm.c index fadfdd98707c4..0c13b7409947e 100644 --- a/tools/testing/selftests/bpf/progs/lsm.c +++ b/tools/testing/selftests/bpf/progs/lsm.c @@ -92,7 +92,7 @@ int BPF_PROG(test_int_hook, struct vm_area_struct *vma, if (ret != 0) return ret; - __u32 pid = bpf_get_current_pid_tgid() >> 32; + __s32 pid = bpf_get_current_pid_tgid() >> 32; int is_stack = 0; is_stack = (vma->vm_start <= vma->vm_mm->start_stack && diff --git a/tools/testing/selftests/bpf/progs/normal_map_btf.c b/tools/testing/selftests/bpf/progs/normal_map_btf.c index 66cde82aa86d9..a45c9299552c9 100644 --- a/tools/testing/selftests/bpf/progs/normal_map_btf.c +++ b/tools/testing/selftests/bpf/progs/normal_map_btf.c @@ -36,7 +36,7 @@ int add_to_list_in_array(void *ctx) struct node_data *new; int zero = 0; - if (done || (u32)bpf_get_current_pid_tgid() != pid) + if (done || (int)bpf_get_current_pid_tgid() != pid) return 0; value = bpf_map_lookup_elem(&array, &zero); diff --git a/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c b/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c index 1a891d30f1fe0..f2b8eb2ff76f7 100644 --- a/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c +++ b/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c @@ -17,6 +17,10 @@ struct val_with_rb_root_t { struct bpf_spin_lock lock; }; +struct val_600b_t { + char b[600]; +}; + struct elem { long sum; struct val_t __percpu_kptr *pc; @@ -161,4 +165,18 @@ int BPF_PROG(test_array_map_7) return 0; } +SEC("?fentry.s/bpf_fentry_test1") +__failure __msg("bpf_percpu_obj_new type size (600) is greater than 512") +int BPF_PROG(test_array_map_8) +{ + struct val_600b_t __percpu_kptr *p; + + p = bpf_percpu_obj_new(struct val_600b_t); + if (!p) + return 0; + + bpf_percpu_obj_drop(p); + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h index 897061930cb76..de3b6e4e4d0a7 100644 --- a/tools/testing/selftests/bpf/progs/profiler.inc.h +++ b/tools/testing/selftests/bpf/progs/profiler.inc.h @@ -7,6 +7,7 @@ #include "profiler.h" #include "err.h" +#include "bpf_experimental.h" #ifndef NULL #define NULL 0 @@ -132,7 +133,7 @@ struct { } disallowed_exec_inodes SEC(".maps"); #ifndef ARRAY_SIZE -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) +#define ARRAY_SIZE(arr) (int)(sizeof(arr) / sizeof(arr[0])) #endif static INLINE bool IS_ERR(const void* ptr) @@ -221,8 +222,7 @@ static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node, return payload; if (cgroup_node == cgroup_root_node) *root_pos = payload - payload_start; - if (filepart_length <= MAX_PATH) { - barrier_var(filepart_length); + if (bpf_cmp_likely(filepart_length, <=, MAX_PATH)) { payload += filepart_length; } cgroup_node = BPF_CORE_READ(cgroup_node, parent); @@ -305,9 +305,7 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data, size_t cgroup_root_length = bpf_probe_read_kernel_str(payload, MAX_PATH, BPF_CORE_READ(root_kernfs, name)); - barrier_var(cgroup_root_length); - if (cgroup_root_length <= MAX_PATH) { - barrier_var(cgroup_root_length); + if (bpf_cmp_likely(cgroup_root_length, <=, MAX_PATH)) { cgroup_data->cgroup_root_length = cgroup_root_length; payload += cgroup_root_length; } @@ -315,9 +313,7 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data, size_t cgroup_proc_length = bpf_probe_read_kernel_str(payload, MAX_PATH, BPF_CORE_READ(proc_kernfs, name)); - barrier_var(cgroup_proc_length); - if (cgroup_proc_length <= MAX_PATH) { - barrier_var(cgroup_proc_length); + if (bpf_cmp_likely(cgroup_proc_length, <=, MAX_PATH)) { cgroup_data->cgroup_proc_length = cgroup_proc_length; payload += cgroup_proc_length; } @@ -347,9 +343,7 @@ static INLINE void* populate_var_metadata(struct var_metadata_t* metadata, metadata->comm_length = 0; size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm); - barrier_var(comm_length); - if (comm_length <= TASK_COMM_LEN) { - barrier_var(comm_length); + if (bpf_cmp_likely(comm_length, <=, TASK_COMM_LEN)) { metadata->comm_length = comm_length; payload += comm_length; } @@ -494,10 +488,9 @@ read_absolute_file_path_from_dentry(struct dentry* filp_dentry, void* payload) filepart_length = bpf_probe_read_kernel_str(payload, MAX_PATH, BPF_CORE_READ(filp_dentry, d_name.name)); - barrier_var(filepart_length); - if (filepart_length > MAX_PATH) + bpf_nop_mov(filepart_length); + if (bpf_cmp_unlikely(filepart_length, >, MAX_PATH)) break; - barrier_var(filepart_length); payload += filepart_length; length += filepart_length; @@ -579,9 +572,7 @@ ssize_t BPF_KPROBE(kprobe__proc_sys_write, size_t sysctl_val_length = bpf_probe_read_kernel_str(payload, CTL_MAXNAME, buf); - barrier_var(sysctl_val_length); - if (sysctl_val_length <= CTL_MAXNAME) { - barrier_var(sysctl_val_length); + if (bpf_cmp_likely(sysctl_val_length, <=, CTL_MAXNAME)) { sysctl_data->sysctl_val_length = sysctl_val_length; payload += sysctl_val_length; } @@ -590,9 +581,7 @@ ssize_t BPF_KPROBE(kprobe__proc_sys_write, bpf_probe_read_kernel_str(payload, MAX_PATH, BPF_CORE_READ(filp, f_path.dentry, d_name.name)); - barrier_var(sysctl_path_length); - if (sysctl_path_length <= MAX_PATH) { - barrier_var(sysctl_path_length); + if (bpf_cmp_likely(sysctl_path_length, <=, MAX_PATH)) { sysctl_data->sysctl_path_length = sysctl_path_length; payload += sysctl_path_length; } @@ -645,7 +634,7 @@ int raw_tracepoint__sched_process_exit(void* ctx) for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) { struct var_kill_data_t* past_kill_data = &arr_struct->array[i]; - if (past_kill_data != NULL && past_kill_data->kill_target_pid == tpid) { + if (past_kill_data != NULL && past_kill_data->kill_target_pid == (pid_t)tpid) { bpf_probe_read_kernel(kill_data, sizeof(*past_kill_data), past_kill_data); void* payload = kill_data->payload; @@ -658,9 +647,7 @@ int raw_tracepoint__sched_process_exit(void* ctx) kill_data->kill_target_cgroup_proc_length = 0; size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm); - barrier_var(comm_length); - if (comm_length <= TASK_COMM_LEN) { - barrier_var(comm_length); + if (bpf_cmp_likely(comm_length, <=, TASK_COMM_LEN)) { kill_data->kill_target_name_length = comm_length; payload += comm_length; } @@ -669,9 +656,7 @@ int raw_tracepoint__sched_process_exit(void* ctx) bpf_probe_read_kernel_str(payload, KILL_TARGET_LEN, BPF_CORE_READ(proc_kernfs, name)); - barrier_var(cgroup_proc_length); - if (cgroup_proc_length <= KILL_TARGET_LEN) { - barrier_var(cgroup_proc_length); + if (bpf_cmp_likely(cgroup_proc_length, <=, KILL_TARGET_LEN)) { kill_data->kill_target_cgroup_proc_length = cgroup_proc_length; payload += cgroup_proc_length; } @@ -731,9 +716,7 @@ int raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args* ctx) const char* filename = BPF_CORE_READ(bprm, filename); size_t bin_path_length = bpf_probe_read_kernel_str(payload, MAX_FILENAME_LEN, filename); - barrier_var(bin_path_length); - if (bin_path_length <= MAX_FILENAME_LEN) { - barrier_var(bin_path_length); + if (bpf_cmp_likely(bin_path_length, <=, MAX_FILENAME_LEN)) { proc_exec_data->bin_path_length = bin_path_length; payload += bin_path_length; } @@ -743,8 +726,7 @@ int raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args* ctx) unsigned int cmdline_length = probe_read_lim(payload, arg_start, arg_end - arg_start, MAX_ARGS_LEN); - if (cmdline_length <= MAX_ARGS_LEN) { - barrier_var(cmdline_length); + if (bpf_cmp_likely(cmdline_length, <=, MAX_ARGS_LEN)) { proc_exec_data->cmdline_length = cmdline_length; payload += cmdline_length; } @@ -821,9 +803,7 @@ int kprobe_ret__do_filp_open(struct pt_regs* ctx) payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload); size_t len = read_absolute_file_path_from_dentry(filp_dentry, payload); - barrier_var(len); - if (len <= MAX_FILEPATH_LENGTH) { - barrier_var(len); + if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) { payload += len; filemod_data->dst_filepath_length = len; } @@ -876,17 +856,13 @@ int BPF_KPROBE(kprobe__vfs_link, payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload); size_t len = read_absolute_file_path_from_dentry(old_dentry, payload); - barrier_var(len); - if (len <= MAX_FILEPATH_LENGTH) { - barrier_var(len); + if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) { payload += len; filemod_data->src_filepath_length = len; } len = read_absolute_file_path_from_dentry(new_dentry, payload); - barrier_var(len); - if (len <= MAX_FILEPATH_LENGTH) { - barrier_var(len); + if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) { payload += len; filemod_data->dst_filepath_length = len; } @@ -936,16 +912,12 @@ int BPF_KPROBE(kprobe__vfs_symlink, struct inode* dir, struct dentry* dentry, size_t len = bpf_probe_read_kernel_str(payload, MAX_FILEPATH_LENGTH, oldname); - barrier_var(len); - if (len <= MAX_FILEPATH_LENGTH) { - barrier_var(len); + if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) { payload += len; filemod_data->src_filepath_length = len; } len = read_absolute_file_path_from_dentry(dentry, payload); - barrier_var(len); - if (len <= MAX_FILEPATH_LENGTH) { - barrier_var(len); + if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) { payload += len; filemod_data->dst_filepath_length = len; } diff --git a/tools/testing/selftests/bpf/progs/sockopt_inherit.c b/tools/testing/selftests/bpf/progs/sockopt_inherit.c index c8f59caa46394..a3434b8409281 100644 --- a/tools/testing/selftests/bpf/progs/sockopt_inherit.c +++ b/tools/testing/selftests/bpf/progs/sockopt_inherit.c @@ -9,7 +9,7 @@ char _license[] SEC("license") = "GPL"; #define CUSTOM_INHERIT2 1 #define CUSTOM_LISTENER 2 -__u32 page_size = 0; +__s32 page_size = 0; struct sockopt_inherit { __u8 val; diff --git a/tools/testing/selftests/bpf/progs/sockopt_multi.c b/tools/testing/selftests/bpf/progs/sockopt_multi.c index 96f29fce050bc..db67278e12d4d 100644 --- a/tools/testing/selftests/bpf/progs/sockopt_multi.c +++ b/tools/testing/selftests/bpf/progs/sockopt_multi.c @@ -5,7 +5,7 @@ char _license[] SEC("license") = "GPL"; -__u32 page_size = 0; +__s32 page_size = 0; SEC("cgroup/getsockopt") int _getsockopt_child(struct bpf_sockopt *ctx) diff --git a/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c b/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c index dbe235ede7f38..83753b00a5569 100644 --- a/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c +++ b/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c @@ -9,7 +9,7 @@ char _license[] SEC("license") = "GPL"; -__u32 page_size = 0; +__s32 page_size = 0; SEC("cgroup/setsockopt") int sockopt_qos_to_cc(struct bpf_sockopt *ctx) diff --git a/tools/testing/selftests/bpf/progs/test_bpf_ma.c b/tools/testing/selftests/bpf/progs/test_bpf_ma.c index 069db9085e789..3494ca30fa7fe 100644 --- a/tools/testing/selftests/bpf/progs/test_bpf_ma.c +++ b/tools/testing/selftests/bpf/progs/test_bpf_ma.c @@ -20,17 +20,20 @@ char _license[] SEC("license") = "GPL"; const unsigned int data_sizes[] = {16, 32, 64, 96, 128, 192, 256, 512, 1024, 2048, 4096}; const volatile unsigned int data_btf_ids[ARRAY_SIZE(data_sizes)] = {}; +const unsigned int percpu_data_sizes[] = {8, 16, 32, 64, 96, 128, 192, 256, 512}; +const volatile unsigned int percpu_data_btf_ids[ARRAY_SIZE(data_sizes)] = {}; + int err = 0; -int pid = 0; +u32 pid = 0; #define DEFINE_ARRAY_WITH_KPTR(_size) \ struct bin_data_##_size { \ char data[_size - sizeof(void *)]; \ }; \ + /* See Commit 5d8d6634ccc, force btf generation for type bin_data_##_size */ \ + struct bin_data_##_size *__bin_data_##_size; \ struct map_value_##_size { \ struct bin_data_##_size __kptr * data; \ - /* To emit BTF info for bin_data_xx */ \ - struct bin_data_##_size not_used; \ }; \ struct { \ __uint(type, BPF_MAP_TYPE_ARRAY); \ @@ -40,8 +43,12 @@ int pid = 0; } array_##_size SEC(".maps") #define DEFINE_ARRAY_WITH_PERCPU_KPTR(_size) \ + struct percpu_bin_data_##_size { \ + char data[_size]; \ + }; \ + struct percpu_bin_data_##_size *__percpu_bin_data_##_size; \ struct map_value_percpu_##_size { \ - struct bin_data_##_size __percpu_kptr * data; \ + struct percpu_bin_data_##_size __percpu_kptr * data; \ }; \ struct { \ __uint(type, BPF_MAP_TYPE_ARRAY); \ @@ -114,7 +121,7 @@ static __always_inline void batch_percpu_alloc(struct bpf_map *map, unsigned int return; } /* per-cpu allocator may not be able to refill in time */ - new = bpf_percpu_obj_new_impl(data_btf_ids[idx], NULL); + new = bpf_percpu_obj_new_impl(percpu_data_btf_ids[idx], NULL); if (!new) continue; @@ -179,7 +186,7 @@ DEFINE_ARRAY_WITH_KPTR(1024); DEFINE_ARRAY_WITH_KPTR(2048); DEFINE_ARRAY_WITH_KPTR(4096); -/* per-cpu kptr doesn't support bin_data_8 which is a zero-sized array */ +DEFINE_ARRAY_WITH_PERCPU_KPTR(8); DEFINE_ARRAY_WITH_PERCPU_KPTR(16); DEFINE_ARRAY_WITH_PERCPU_KPTR(32); DEFINE_ARRAY_WITH_PERCPU_KPTR(64); @@ -188,9 +195,6 @@ DEFINE_ARRAY_WITH_PERCPU_KPTR(128); DEFINE_ARRAY_WITH_PERCPU_KPTR(192); DEFINE_ARRAY_WITH_PERCPU_KPTR(256); DEFINE_ARRAY_WITH_PERCPU_KPTR(512); -DEFINE_ARRAY_WITH_PERCPU_KPTR(1024); -DEFINE_ARRAY_WITH_PERCPU_KPTR(2048); -DEFINE_ARRAY_WITH_PERCPU_KPTR(4096); SEC("?fentry/" SYS_PREFIX "sys_nanosleep") int test_batch_alloc_free(void *ctx) @@ -246,20 +250,18 @@ int test_batch_percpu_alloc_free(void *ctx) if ((u32)bpf_get_current_pid_tgid() != pid) return 0; - /* Alloc 128 16-bytes per-cpu objects in batch to trigger refilling, - * then free 128 16-bytes per-cpu objects in batch to trigger freeing. + /* Alloc 128 8-bytes per-cpu objects in batch to trigger refilling, + * then free 128 8-bytes per-cpu objects in batch to trigger freeing. */ - CALL_BATCH_PERCPU_ALLOC_FREE(16, 128, 0); - CALL_BATCH_PERCPU_ALLOC_FREE(32, 128, 1); - CALL_BATCH_PERCPU_ALLOC_FREE(64, 128, 2); - CALL_BATCH_PERCPU_ALLOC_FREE(96, 128, 3); - CALL_BATCH_PERCPU_ALLOC_FREE(128, 128, 4); - CALL_BATCH_PERCPU_ALLOC_FREE(192, 128, 5); - CALL_BATCH_PERCPU_ALLOC_FREE(256, 128, 6); - CALL_BATCH_PERCPU_ALLOC_FREE(512, 64, 7); - CALL_BATCH_PERCPU_ALLOC_FREE(1024, 32, 8); - CALL_BATCH_PERCPU_ALLOC_FREE(2048, 16, 9); - CALL_BATCH_PERCPU_ALLOC_FREE(4096, 8, 10); + CALL_BATCH_PERCPU_ALLOC_FREE(8, 128, 0); + CALL_BATCH_PERCPU_ALLOC_FREE(16, 128, 1); + CALL_BATCH_PERCPU_ALLOC_FREE(32, 128, 2); + CALL_BATCH_PERCPU_ALLOC_FREE(64, 128, 3); + CALL_BATCH_PERCPU_ALLOC_FREE(96, 128, 4); + CALL_BATCH_PERCPU_ALLOC_FREE(128, 128, 5); + CALL_BATCH_PERCPU_ALLOC_FREE(192, 128, 6); + CALL_BATCH_PERCPU_ALLOC_FREE(256, 128, 7); + CALL_BATCH_PERCPU_ALLOC_FREE(512, 64, 8); return 0; } @@ -270,20 +272,18 @@ int test_percpu_free_through_map_free(void *ctx) if ((u32)bpf_get_current_pid_tgid() != pid) return 0; - /* Alloc 128 16-bytes per-cpu objects in batch to trigger refilling, + /* Alloc 128 8-bytes per-cpu objects in batch to trigger refilling, * then free these object through map free. */ - CALL_BATCH_PERCPU_ALLOC(16, 128, 0); - CALL_BATCH_PERCPU_ALLOC(32, 128, 1); - CALL_BATCH_PERCPU_ALLOC(64, 128, 2); - CALL_BATCH_PERCPU_ALLOC(96, 128, 3); - CALL_BATCH_PERCPU_ALLOC(128, 128, 4); - CALL_BATCH_PERCPU_ALLOC(192, 128, 5); - CALL_BATCH_PERCPU_ALLOC(256, 128, 6); - CALL_BATCH_PERCPU_ALLOC(512, 64, 7); - CALL_BATCH_PERCPU_ALLOC(1024, 32, 8); - CALL_BATCH_PERCPU_ALLOC(2048, 16, 9); - CALL_BATCH_PERCPU_ALLOC(4096, 8, 10); + CALL_BATCH_PERCPU_ALLOC(8, 128, 0); + CALL_BATCH_PERCPU_ALLOC(16, 128, 1); + CALL_BATCH_PERCPU_ALLOC(32, 128, 2); + CALL_BATCH_PERCPU_ALLOC(64, 128, 3); + CALL_BATCH_PERCPU_ALLOC(96, 128, 4); + CALL_BATCH_PERCPU_ALLOC(128, 128, 5); + CALL_BATCH_PERCPU_ALLOC(192, 128, 6); + CALL_BATCH_PERCPU_ALLOC(256, 128, 7); + CALL_BATCH_PERCPU_ALLOC(512, 64, 8); return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c index a17dd83eae674..ee4a601dcb066 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c @@ -53,7 +53,7 @@ int test_core_kernel(void *ctx) struct task_struct *task = (void *)bpf_get_current_task(); struct core_reloc_kernel_output *out = (void *)&data.out; uint64_t pid_tgid = bpf_get_current_pid_tgid(); - uint32_t real_tgid = (uint32_t)pid_tgid; + int32_t real_tgid = (int32_t)pid_tgid; int pid, tgid; if (data.my_pid_tgid != pid_tgid) diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_module.c b/tools/testing/selftests/bpf/progs/test_core_reloc_module.c index f59f175c7bafb..bcb31ff92dccc 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_module.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_module.c @@ -43,8 +43,8 @@ int BPF_PROG(test_core_module_probed, #if __has_builtin(__builtin_preserve_enum_value) struct core_reloc_module_output *out = (void *)&data.out; __u64 pid_tgid = bpf_get_current_pid_tgid(); - __u32 real_tgid = (__u32)(pid_tgid >> 32); - __u32 real_pid = (__u32)pid_tgid; + __s32 real_tgid = (__s32)(pid_tgid >> 32); + __s32 real_pid = (__s32)pid_tgid; if (data.my_pid_tgid != pid_tgid) return 0; @@ -77,8 +77,8 @@ int BPF_PROG(test_core_module_direct, #if __has_builtin(__builtin_preserve_enum_value) struct core_reloc_module_output *out = (void *)&data.out; __u64 pid_tgid = bpf_get_current_pid_tgid(); - __u32 real_tgid = (__u32)(pid_tgid >> 32); - __u32 real_pid = (__u32)pid_tgid; + __s32 real_tgid = (__s32)(pid_tgid >> 32); + __s32 real_pid = (__s32)pid_tgid; if (data.my_pid_tgid != pid_tgid) return 0; diff --git a/tools/testing/selftests/bpf/progs/test_fsverity.c b/tools/testing/selftests/bpf/progs/test_fsverity.c index 3975495b75c8d..9e0f73e8189ce 100644 --- a/tools/testing/selftests/bpf/progs/test_fsverity.c +++ b/tools/testing/selftests/bpf/progs/test_fsverity.c @@ -38,7 +38,7 @@ int BPF_PROG(test_file_open, struct file *f) return 0; got_fsverity = 1; - for (i = 0; i < sizeof(digest); i++) { + for (i = 0; i < (int)sizeof(digest); i++) { if (digest[i] != expected_digest[i]) return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c b/tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c index 7faa8eef0598b..9a06e5eb1fbef 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c +++ b/tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c @@ -102,3 +102,52 @@ int perf_event_ctx(void *ctx) { return perf_event_ctx_subprog(ctx); } + +/* this global subprog can be now called from many types of entry progs, each + * with different context type + */ +__weak int subprog_ctx_tag(void *ctx __arg_ctx) +{ + return bpf_get_stack(ctx, stack, sizeof(stack), 0); +} + +struct my_struct { int x; }; + +__weak int subprog_multi_ctx_tags(void *ctx1 __arg_ctx, + struct my_struct *mem, + void *ctx2 __arg_ctx) +{ + if (!mem) + return 0; + + return bpf_get_stack(ctx1, stack, sizeof(stack), 0) + + mem->x + + bpf_get_stack(ctx2, stack, sizeof(stack), 0); +} + +SEC("?raw_tp") +__success __log_level(2) +int arg_tag_ctx_raw_tp(void *ctx) +{ + struct my_struct x = { .x = 123 }; + + return subprog_ctx_tag(ctx) + subprog_multi_ctx_tags(ctx, &x, ctx); +} + +SEC("?perf_event") +__success __log_level(2) +int arg_tag_ctx_perf(void *ctx) +{ + struct my_struct x = { .x = 123 }; + + return subprog_ctx_tag(ctx) + subprog_multi_ctx_tags(ctx, &x, ctx); +} + +SEC("?kprobe") +__success __log_level(2) +int arg_tag_ctx_kprobe(void *ctx) +{ + struct my_struct x = { .x = 123 }; + + return subprog_ctx_tag(ctx) + subprog_multi_ctx_tags(ctx, &x, ctx); +} diff --git a/tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c b/tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c index eacda9fe07ebb..4cfa42aa94361 100644 --- a/tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c +++ b/tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c @@ -29,7 +29,7 @@ int BPF_PROG(unix_listen, struct socket *sock, int backlog) len = unix_sk->addr->len - sizeof(short); path[0] = '@'; for (i = 1; i < len; i++) { - if (i >= sizeof(struct sockaddr_un)) + if (i >= (int)sizeof(struct sockaddr_un)) break; path[i] = unix_sk->addr->name->sun_path[i]; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c index 5baaafed0d2de..3abf068b84464 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c @@ -38,7 +38,7 @@ int xdp_redirect(struct xdp_md *xdp) if (payload + 1 > data_end) return XDP_ABORTED; - if (xdp->ingress_ifindex != ifindex_in) + if (xdp->ingress_ifindex != (__u32)ifindex_in) return XDP_ABORTED; if (metadata + 1 > data) diff --git a/tools/testing/selftests/bpf/progs/verifier_gotol.c b/tools/testing/selftests/bpf/progs/verifier_gotol.c index d1edbcff9a185..05a329ee45ee9 100644 --- a/tools/testing/selftests/bpf/progs/verifier_gotol.c +++ b/tools/testing/selftests/bpf/progs/verifier_gotol.c @@ -33,6 +33,25 @@ l3_%=: \ : __clobber_all); } +SEC("socket") +__description("gotol, large_imm") +__success __failure_unpriv __retval(40000) +__naked void gotol_large_imm(void) +{ + asm volatile (" \ + gotol 1f; \ +0: \ + r0 = 0; \ + .rept 40000; \ + r0 += 1; \ + .endr; \ + exit; \ +1: gotol 0b; \ +" : + : + : __clobber_all); +} + #else SEC("socket") diff --git a/tools/testing/selftests/bpf/progs/verifier_helper_value_access.c b/tools/testing/selftests/bpf/progs/verifier_helper_value_access.c index 692216c0ad3d4..886498b5e6f34 100644 --- a/tools/testing/selftests/bpf/progs/verifier_helper_value_access.c +++ b/tools/testing/selftests/bpf/progs/verifier_helper_value_access.c @@ -89,9 +89,14 @@ l0_%=: exit; \ : __clobber_all); } +/* Call a function taking a pointer and a size which doesn't allow the size to + * be zero (i.e. bpf_trace_printk() declares the second argument to be + * ARG_CONST_SIZE, not ARG_CONST_SIZE_OR_ZERO). We attempt to pass zero for the + * size and expect to fail. + */ SEC("tracepoint") __description("helper access to map: empty range") -__failure __msg("invalid access to map value, value_size=48 off=0 size=0") +__failure __msg("R2 invalid zero-sized read: u64=[0,0]") __naked void access_to_map_empty_range(void) { asm volatile (" \ @@ -113,6 +118,38 @@ l0_%=: exit; \ : __clobber_all); } +/* Like the test above, but this time the size register is not known to be zero; + * its lower-bound is zero though, which is still unacceptable. + */ +SEC("tracepoint") +__description("helper access to map: possibly-empty ange") +__failure __msg("R2 invalid zero-sized read: u64=[0,4]") +__naked void access_to_map_possibly_empty_range(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + /* Read an unknown value */ \ + r7 = *(u64*)(r0 + 0); \ + /* Make it small and positive, to avoid other errors */ \ + r7 &= 4; \ + r2 = 0; \ + r2 += r7; \ + call %[bpf_trace_printk]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_trace_printk), + __imm_addr(map_hash_48b) + : __clobber_all); +} + SEC("tracepoint") __description("helper access to map: out-of-bound range") __failure __msg("invalid access to map value, value_size=48 off=0 size=56") @@ -221,7 +258,7 @@ l0_%=: exit; \ SEC("tracepoint") __description("helper access to adjusted map (via const imm): empty range") -__failure __msg("invalid access to map value, value_size=48 off=4 size=0") +__failure __msg("R2 invalid zero-sized read") __naked void via_const_imm_empty_range(void) { asm volatile (" \ @@ -386,7 +423,7 @@ l0_%=: exit; \ SEC("tracepoint") __description("helper access to adjusted map (via const reg): empty range") -__failure __msg("R1 min value is outside of the allowed memory range") +__failure __msg("R2 invalid zero-sized read") __naked void via_const_reg_empty_range(void) { asm volatile (" \ @@ -556,7 +593,7 @@ l0_%=: exit; \ SEC("tracepoint") __description("helper access to adjusted map (via variable): empty range") -__failure __msg("R1 min value is outside of the allowed memory range") +__failure __msg("R2 invalid zero-sized read") __naked void map_via_variable_empty_range(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_raw_stack.c b/tools/testing/selftests/bpf/progs/verifier_raw_stack.c index f67390224a9cf..7cc83acac7271 100644 --- a/tools/testing/selftests/bpf/progs/verifier_raw_stack.c +++ b/tools/testing/selftests/bpf/progs/verifier_raw_stack.c @@ -64,7 +64,7 @@ __naked void load_bytes_negative_len_2(void) SEC("tc") __description("raw_stack: skb_load_bytes, zero len") -__failure __msg("invalid zero-sized read") +__failure __msg("R4 invalid zero-sized read: u64=[0,0]") __naked void skb_load_bytes_zero_len(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index 74ceb7877ae22..f01391021218d 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -12,7 +12,7 @@ #define str_has_pfx(str, pfx) \ (strncmp(str, pfx, __builtin_constant_p(pfx) ? sizeof(pfx) - 1 : strlen(pfx)) == 0) -#define TEST_LOADER_LOG_BUF_SZ 1048576 +#define TEST_LOADER_LOG_BUF_SZ 2097152 #define TEST_TAG_EXPECT_FAILURE "comment:test_expect_failure" #define TEST_TAG_EXPECT_SUCCESS "comment:test_expect_success" diff --git a/tools/testing/selftests/hid/config b/tools/testing/selftests/hid/config index 4f425178b56fa..1758b055f2952 100644 --- a/tools/testing/selftests/hid/config +++ b/tools/testing/selftests/hid/config @@ -1,5 +1,4 @@ CONFIG_BPF_EVENTS=y -CONFIG_BPFILTER=y CONFIG_BPF_JIT_ALWAYS_ON=y CONFIG_BPF_JIT=y CONFIG_BPF_KPROBE_OVERRIDE=y