diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 909fc033173a7..da8c64ca8dc99 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -35,6 +35,7 @@ struct bpf_map_ops { void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file, int fd); void (*map_fd_put_ptr)(void *ptr); + u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf); }; struct bpf_map { diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index a13b031dc6b80..5efb4db44e1ef 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -66,7 +66,10 @@ struct bpf_verifier_state_list { }; struct bpf_insn_aux_data { - enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ + union { + enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ + struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */ + }; }; #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ diff --git a/include/linux/filter.h b/include/linux/filter.h index fbf7b39e81035..dffa072b7b79e 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -693,6 +693,11 @@ static inline bool bpf_jit_is_ebpf(void) # endif } +static inline bool ebpf_jit_enabled(void) +{ + return bpf_jit_enable && bpf_jit_is_ebpf(); +} + static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp) { return fp->jited && bpf_jit_is_ebpf(); @@ -753,6 +758,11 @@ void bpf_prog_kallsyms_del(struct bpf_prog *fp); #else /* CONFIG_BPF_JIT */ +static inline bool ebpf_jit_enabled(void) +{ + return false; +} + static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp) { return false; diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 6b6f41f0b2116..bcf9955fac956 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -1,4 +1,5 @@ /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com + * Copyright (c) 2016,2017 Facebook * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -113,6 +114,33 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key) return array->value + array->elem_size * index; } +/* emit BPF instructions equivalent to C code of array_map_lookup_elem() */ +static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + struct bpf_insn *insn = insn_buf; + u32 elem_size = array->elem_size; + const int ret = BPF_REG_0; + const int map_ptr = BPF_REG_1; + const int index = BPF_REG_2; + + *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value)); + *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); + *insn++ = BPF_JMP_IMM(BPF_JGE, ret, array->map.max_entries, + elem_size == 1 ? 2 : 3); + if (elem_size == 1) { + /* nop */ + } else if (is_power_of_2(elem_size)) { + *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size)); + } else { + *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size); + } + *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr); + *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); + *insn++ = BPF_MOV64_IMM(ret, 0); + return insn - insn_buf; +} + /* Called from eBPF program */ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key) { @@ -267,6 +295,7 @@ static const struct bpf_map_ops array_ops = { .map_lookup_elem = array_map_lookup_elem, .map_update_elem = array_map_update_elem, .map_delete_elem = array_map_delete_elem, + .map_gen_lookup = array_map_gen_lookup, }; static struct bpf_map_type_list array_type __ro_after_init = { diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index afe5bab376c98..000153acb6d57 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -419,7 +419,11 @@ static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head, return NULL; } -/* Called from syscall or from eBPF program */ +/* Called from syscall or from eBPF program directly, so + * arguments have to match bpf_map_lookup_elem() exactly. + * The return value is adjusted by BPF instructions + * in htab_map_gen_lookup(). + */ static void *__htab_map_lookup_elem(struct bpf_map *map, void *key) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); @@ -451,6 +455,30 @@ static void *htab_map_lookup_elem(struct bpf_map *map, void *key) return NULL; } +/* inline bpf_map_lookup_elem() call. + * Instead of: + * bpf_prog + * bpf_map_lookup_elem + * map->ops->map_lookup_elem + * htab_map_lookup_elem + * __htab_map_lookup_elem + * do: + * bpf_prog + * __htab_map_lookup_elem + */ +static u32 htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) +{ + struct bpf_insn *insn = insn_buf; + const int ret = BPF_REG_0; + + *insn++ = BPF_EMIT_CALL((u64 (*)(u64, u64, u64, u64, u64))__htab_map_lookup_elem); + *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1); + *insn++ = BPF_ALU64_IMM(BPF_ADD, ret, + offsetof(struct htab_elem, key) + + round_up(map->key_size, 8)); + return insn - insn_buf; +} + static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key) { struct htab_elem *l = __htab_map_lookup_elem(map, key); @@ -1062,6 +1090,7 @@ static const struct bpf_map_ops htab_ops = { .map_lookup_elem = htab_map_lookup_elem, .map_update_elem = htab_map_update_elem, .map_delete_elem = htab_map_delete_elem, + .map_gen_lookup = htab_map_gen_lookup, }; static struct bpf_map_type_list htab_type __ro_after_init = { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 7af0dcc5d7555..48c914b983bd1 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -586,59 +586,6 @@ void bpf_register_prog_type(struct bpf_prog_type_list *tl) list_add(&tl->list_node, &bpf_prog_types); } -/* fixup insn->imm field of bpf_call instructions: - * if (insn->imm == BPF_FUNC_map_lookup_elem) - * insn->imm = bpf_map_lookup_elem - __bpf_call_base; - * else if (insn->imm == BPF_FUNC_map_update_elem) - * insn->imm = bpf_map_update_elem - __bpf_call_base; - * else ... - * - * this function is called after eBPF program passed verification - */ -static void fixup_bpf_calls(struct bpf_prog *prog) -{ - const struct bpf_func_proto *fn; - int i; - - for (i = 0; i < prog->len; i++) { - struct bpf_insn *insn = &prog->insnsi[i]; - - if (insn->code == (BPF_JMP | BPF_CALL)) { - /* we reach here when program has bpf_call instructions - * and it passed bpf_check(), means that - * ops->get_func_proto must have been supplied, check it - */ - BUG_ON(!prog->aux->ops->get_func_proto); - - if (insn->imm == BPF_FUNC_get_route_realm) - prog->dst_needed = 1; - if (insn->imm == BPF_FUNC_get_prandom_u32) - bpf_user_rnd_init_once(); - if (insn->imm == BPF_FUNC_xdp_adjust_head) - prog->xdp_adjust_head = 1; - if (insn->imm == BPF_FUNC_tail_call) { - /* mark bpf_tail_call as different opcode - * to avoid conditional branch in - * interpeter for every normal call - * and to prevent accidental JITing by - * JIT compiler that doesn't support - * bpf_tail_call yet - */ - insn->imm = 0; - insn->code |= BPF_X; - continue; - } - - fn = prog->aux->ops->get_func_proto(insn->imm); - /* all functions that have prototype and verifier allowed - * programs to call them, must be real in-kernel functions - */ - BUG_ON(!fn->func); - insn->imm = fn->func - __bpf_call_base; - } - } -} - /* drop refcnt on maps used by eBPF program and free auxilary data */ static void free_used_maps(struct bpf_prog_aux *aux) { @@ -892,9 +839,6 @@ static int bpf_prog_load(union bpf_attr *attr) if (err < 0) goto free_used_maps; - /* fixup BPF_CALL->imm field */ - fixup_bpf_calls(prog); - /* eBPF program is ready to be JITed */ prog = bpf_prog_select_runtime(prog, &err); if (err < 0) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 796b68d001198..90bf46787603a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1273,7 +1273,7 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env) } } -static int check_call(struct bpf_verifier_env *env, int func_id) +static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) { struct bpf_verifier_state *state = &env->cur_state; const struct bpf_func_proto *fn = NULL; @@ -1369,6 +1369,7 @@ static int check_call(struct bpf_verifier_env *env, int func_id) } regs[BPF_REG_0].map_ptr = meta.map_ptr; regs[BPF_REG_0].id = ++env->id_gen; + env->insn_aux_data[insn_idx].map_ptr = meta.map_ptr; } else { verbose("unknown return type %d of func %s#%d\n", fn->ret_type, func_id_name(func_id), func_id); @@ -2940,7 +2941,7 @@ static int do_check(struct bpf_verifier_env *env) return -EINVAL; } - err = check_call(env, insn->imm); + err = check_call(env, insn->imm, insn_idx); if (err) return err; @@ -3162,6 +3163,41 @@ static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env) insn->src_reg = 0; } +/* single env->prog->insni[off] instruction was replaced with the range + * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying + * [0, off) and [off, end) to new locations, so the patched range stays zero + */ +static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len, + u32 off, u32 cnt) +{ + struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data; + + if (cnt == 1) + return 0; + new_data = vzalloc(sizeof(struct bpf_insn_aux_data) * prog_len); + if (!new_data) + return -ENOMEM; + memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off); + memcpy(new_data + off + cnt - 1, old_data + off, + sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1)); + env->insn_aux_data = new_data; + vfree(old_data); + return 0; +} + +static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off, + const struct bpf_insn *patch, u32 len) +{ + struct bpf_prog *new_prog; + + new_prog = bpf_patch_insn_single(env->prog, off, patch, len); + if (!new_prog) + return NULL; + if (adjust_insn_aux_data(env, new_prog->len, off, len)) + return NULL; + return new_prog; +} + /* convert load instructions that access fields of 'struct __sk_buff' * into sequence of instructions that access fields of 'struct sk_buff' */ @@ -3181,10 +3217,10 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) verbose("bpf verifier is misconfigured\n"); return -EINVAL; } else if (cnt) { - new_prog = bpf_patch_insn_single(env->prog, 0, - insn_buf, cnt); + new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt); if (!new_prog) return -ENOMEM; + env->prog = new_prog; delta += cnt - 1; } @@ -3209,7 +3245,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) else continue; - if (env->insn_aux_data[i].ptr_type != PTR_TO_CTX) + if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX) continue; cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog); @@ -3218,8 +3254,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) return -EINVAL; } - new_prog = bpf_patch_insn_single(env->prog, i + delta, insn_buf, - cnt); + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); if (!new_prog) return -ENOMEM; @@ -3233,6 +3268,83 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) return 0; } +/* fixup insn->imm field of bpf_call instructions + * and inline eligible helpers as explicit sequence of BPF instructions + * + * this function is called after eBPF program passed verification + */ +static int fixup_bpf_calls(struct bpf_verifier_env *env) +{ + struct bpf_prog *prog = env->prog; + struct bpf_insn *insn = prog->insnsi; + const struct bpf_func_proto *fn; + const int insn_cnt = prog->len; + struct bpf_insn insn_buf[16]; + struct bpf_prog *new_prog; + struct bpf_map *map_ptr; + int i, cnt, delta = 0; + + for (i = 0; i < insn_cnt; i++, insn++) { + if (insn->code != (BPF_JMP | BPF_CALL)) + continue; + + if (insn->imm == BPF_FUNC_get_route_realm) + prog->dst_needed = 1; + if (insn->imm == BPF_FUNC_get_prandom_u32) + bpf_user_rnd_init_once(); + if (insn->imm == BPF_FUNC_xdp_adjust_head) + prog->xdp_adjust_head = 1; + if (insn->imm == BPF_FUNC_tail_call) { + /* mark bpf_tail_call as different opcode to avoid + * conditional branch in the interpeter for every normal + * call and to prevent accidental JITing by JIT compiler + * that doesn't support bpf_tail_call yet + */ + insn->imm = 0; + insn->code |= BPF_X; + continue; + } + + if (ebpf_jit_enabled() && insn->imm == BPF_FUNC_map_lookup_elem) { + map_ptr = env->insn_aux_data[i + delta].map_ptr; + if (!map_ptr->ops->map_gen_lookup) + goto patch_call_imm; + + cnt = map_ptr->ops->map_gen_lookup(map_ptr, insn_buf); + if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) { + verbose("bpf verifier is misconfigured\n"); + return -EINVAL; + } + + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, + cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + + /* keep walking new program and skip insns we just inserted */ + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + continue; + } + +patch_call_imm: + fn = prog->aux->ops->get_func_proto(insn->imm); + /* all functions that have prototype and verifier allowed + * programs to call them, must be real in-kernel functions + */ + if (!fn->func) { + verbose("kernel subsystem misconfigured func %s#%d\n", + func_id_name(insn->imm), insn->imm); + return -EFAULT; + } + insn->imm = fn->func - __bpf_call_base; + } + + return 0; +} + static void free_states(struct bpf_verifier_env *env) { struct bpf_verifier_state_list *sl, *sln; @@ -3328,6 +3440,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) /* program is valid, convert *(u32*)(ctx + off) accesses */ ret = convert_ctx_accesses(env); + if (ret == 0) + ret = fixup_bpf_calls(env); + if (log_level && log_len >= log_size - 1) { BUG_ON(log_len >= log_size); /* verifier log exceeded user supplied buffer */ diff --git a/samples/bpf/map_perf_test_kern.c b/samples/bpf/map_perf_test_kern.c index a91872a97742a..9da2a3441b0a2 100644 --- a/samples/bpf/map_perf_test_kern.c +++ b/samples/bpf/map_perf_test_kern.c @@ -65,6 +65,13 @@ struct bpf_map_def SEC("maps") lpm_trie_map_alloc = { .map_flags = BPF_F_NO_PREALLOC, }; +struct bpf_map_def SEC("maps") array_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(long), + .max_entries = MAX_ENTRIES, +}; + SEC("kprobe/sys_getuid") int stress_hmap(struct pt_regs *ctx) { @@ -165,5 +172,31 @@ int stress_lpm_trie_map_alloc(struct pt_regs *ctx) return 0; } +SEC("kprobe/sys_getpgid") +int stress_hash_map_lookup(struct pt_regs *ctx) +{ + u32 key = 1, i; + long *value; + +#pragma clang loop unroll(full) + for (i = 0; i < 64; ++i) + value = bpf_map_lookup_elem(&hash_map, &key); + + return 0; +} + +SEC("kprobe/sys_getpgrp") +int stress_array_map_lookup(struct pt_regs *ctx) +{ + u32 key = 1, i; + long *value; + +#pragma clang loop unroll(full) + for (i = 0; i < 64; ++i) + value = bpf_map_lookup_elem(&array_map, &key); + + return 0; +} + char _license[] SEC("license") = "GPL"; u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c index 680260a91f50c..e29ff318a7936 100644 --- a/samples/bpf/map_perf_test_user.c +++ b/samples/bpf/map_perf_test_user.c @@ -38,6 +38,8 @@ static __u64 time_get_ns(void) #define LRU_HASH_PREALLOC (1 << 4) #define PERCPU_LRU_HASH_PREALLOC (1 << 5) #define LPM_KMALLOC (1 << 6) +#define HASH_LOOKUP (1 << 7) +#define ARRAY_LOOKUP (1 << 8) static int test_flags = ~0; @@ -125,6 +127,30 @@ static void test_lpm_kmalloc(int cpu) cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); } +static void test_hash_lookup(int cpu) +{ + __u64 start_time; + int i; + + start_time = time_get_ns(); + for (i = 0; i < MAX_CNT; i++) + syscall(__NR_getpgid, 0); + printf("%d:hash_lookup %lld lookups per sec\n", + cpu, MAX_CNT * 1000000000ll * 64 / (time_get_ns() - start_time)); +} + +static void test_array_lookup(int cpu) +{ + __u64 start_time; + int i; + + start_time = time_get_ns(); + for (i = 0; i < MAX_CNT; i++) + syscall(__NR_getpgrp, 0); + printf("%d:array_lookup %lld lookups per sec\n", + cpu, MAX_CNT * 1000000000ll * 64 / (time_get_ns() - start_time)); +} + static void loop(int cpu) { cpu_set_t cpuset; @@ -153,6 +179,12 @@ static void loop(int cpu) if (test_flags & LPM_KMALLOC) test_lpm_kmalloc(cpu); + + if (test_flags & HASH_LOOKUP) + test_hash_lookup(cpu); + + if (test_flags & ARRAY_LOOKUP) + test_array_lookup(cpu); } static void run_perf_test(int tasks)