From e245c5c6a5656e4d61aa7bb08e9694fd6e5b2b9d Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 15 Mar 2017 18:26:39 -0700 Subject: [PATCH 1/6] bpf: move fixup_bpf_calls() function no functional change. move fixup_bpf_calls() to verifier.c it's being refactored in the next patch Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/syscall.c | 56 ------------------------------------------ kernel/bpf/verifier.c | 57 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 56 deletions(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 7af0dcc5d7555..48c914b983bd1 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -586,59 +586,6 @@ void bpf_register_prog_type(struct bpf_prog_type_list *tl) list_add(&tl->list_node, &bpf_prog_types); } -/* fixup insn->imm field of bpf_call instructions: - * if (insn->imm == BPF_FUNC_map_lookup_elem) - * insn->imm = bpf_map_lookup_elem - __bpf_call_base; - * else if (insn->imm == BPF_FUNC_map_update_elem) - * insn->imm = bpf_map_update_elem - __bpf_call_base; - * else ... - * - * this function is called after eBPF program passed verification - */ -static void fixup_bpf_calls(struct bpf_prog *prog) -{ - const struct bpf_func_proto *fn; - int i; - - for (i = 0; i < prog->len; i++) { - struct bpf_insn *insn = &prog->insnsi[i]; - - if (insn->code == (BPF_JMP | BPF_CALL)) { - /* we reach here when program has bpf_call instructions - * and it passed bpf_check(), means that - * ops->get_func_proto must have been supplied, check it - */ - BUG_ON(!prog->aux->ops->get_func_proto); - - if (insn->imm == BPF_FUNC_get_route_realm) - prog->dst_needed = 1; - if (insn->imm == BPF_FUNC_get_prandom_u32) - bpf_user_rnd_init_once(); - if (insn->imm == BPF_FUNC_xdp_adjust_head) - prog->xdp_adjust_head = 1; - if (insn->imm == BPF_FUNC_tail_call) { - /* mark bpf_tail_call as different opcode - * to avoid conditional branch in - * interpeter for every normal call - * and to prevent accidental JITing by - * JIT compiler that doesn't support - * bpf_tail_call yet - */ - insn->imm = 0; - insn->code |= BPF_X; - continue; - } - - fn = prog->aux->ops->get_func_proto(insn->imm); - /* all functions that have prototype and verifier allowed - * programs to call them, must be real in-kernel functions - */ - BUG_ON(!fn->func); - insn->imm = fn->func - __bpf_call_base; - } - } -} - /* drop refcnt on maps used by eBPF program and free auxilary data */ static void free_used_maps(struct bpf_prog_aux *aux) { @@ -892,9 +839,6 @@ static int bpf_prog_load(union bpf_attr *attr) if (err < 0) goto free_used_maps; - /* fixup BPF_CALL->imm field */ - fixup_bpf_calls(prog); - /* eBPF program is ready to be JITed */ prog = bpf_prog_select_runtime(prog, &err); if (err < 0) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 796b68d001198..e41da6c570530 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3233,6 +3233,60 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) return 0; } +/* fixup insn->imm field of bpf_call instructions: + * if (insn->imm == BPF_FUNC_map_lookup_elem) + * insn->imm = bpf_map_lookup_elem - __bpf_call_base; + * else if (insn->imm == BPF_FUNC_map_update_elem) + * insn->imm = bpf_map_update_elem - __bpf_call_base; + * else ... + * + * this function is called after eBPF program passed verification + */ +static void fixup_bpf_calls(struct bpf_prog *prog) +{ + const struct bpf_func_proto *fn; + int i; + + for (i = 0; i < prog->len; i++) { + struct bpf_insn *insn = &prog->insnsi[i]; + + if (insn->code == (BPF_JMP | BPF_CALL)) { + /* we reach here when program has bpf_call instructions + * and it passed bpf_check(), means that + * ops->get_func_proto must have been supplied, check it + */ + BUG_ON(!prog->aux->ops->get_func_proto); + + if (insn->imm == BPF_FUNC_get_route_realm) + prog->dst_needed = 1; + if (insn->imm == BPF_FUNC_get_prandom_u32) + bpf_user_rnd_init_once(); + if (insn->imm == BPF_FUNC_xdp_adjust_head) + prog->xdp_adjust_head = 1; + if (insn->imm == BPF_FUNC_tail_call) { + /* mark bpf_tail_call as different opcode + * to avoid conditional branch in + * interpeter for every normal call + * and to prevent accidental JITing by + * JIT compiler that doesn't support + * bpf_tail_call yet + */ + insn->imm = 0; + insn->code |= BPF_X; + continue; + } + + fn = prog->aux->ops->get_func_proto(insn->imm); + /* all functions that have prototype and verifier allowed + * programs to call them, must be real in-kernel functions + */ + BUG_ON(!fn->func); + insn->imm = fn->func - __bpf_call_base; + } + } +} + + static void free_states(struct bpf_verifier_env *env) { struct bpf_verifier_state_list *sl, *sln; @@ -3328,6 +3382,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) /* program is valid, convert *(u32*)(ctx + off) accesses */ ret = convert_ctx_accesses(env); + if (ret == 0) + fixup_bpf_calls(env->prog); + if (log_level && log_len >= log_size - 1) { BUG_ON(log_len >= log_size); /* verifier log exceeded user supplied buffer */ From 79741b3bdec01a8628368fbcfccc7d189ed606cb Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 15 Mar 2017 18:26:40 -0700 Subject: [PATCH 2/6] bpf: refactor fixup_bpf_calls() reduce indent and make it iterate over instructions similar to convert_ctx_accesses(). Also convert hard BUG_ON into soft verifier error. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 76 ++++++++++++++++++++----------------------- 1 file changed, 35 insertions(+), 41 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e41da6c570530..5dfa9b8111da9 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3233,59 +3233,53 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) return 0; } -/* fixup insn->imm field of bpf_call instructions: - * if (insn->imm == BPF_FUNC_map_lookup_elem) - * insn->imm = bpf_map_lookup_elem - __bpf_call_base; - * else if (insn->imm == BPF_FUNC_map_update_elem) - * insn->imm = bpf_map_update_elem - __bpf_call_base; - * else ... +/* fixup insn->imm field of bpf_call instructions * * this function is called after eBPF program passed verification */ -static void fixup_bpf_calls(struct bpf_prog *prog) +static int fixup_bpf_calls(struct bpf_verifier_env *env) { + struct bpf_prog *prog = env->prog; + struct bpf_insn *insn = prog->insnsi; const struct bpf_func_proto *fn; + const int insn_cnt = prog->len; int i; - for (i = 0; i < prog->len; i++) { - struct bpf_insn *insn = &prog->insnsi[i]; + for (i = 0; i < insn_cnt; i++, insn++) { + if (insn->code != (BPF_JMP | BPF_CALL)) + continue; - if (insn->code == (BPF_JMP | BPF_CALL)) { - /* we reach here when program has bpf_call instructions - * and it passed bpf_check(), means that - * ops->get_func_proto must have been supplied, check it + if (insn->imm == BPF_FUNC_get_route_realm) + prog->dst_needed = 1; + if (insn->imm == BPF_FUNC_get_prandom_u32) + bpf_user_rnd_init_once(); + if (insn->imm == BPF_FUNC_xdp_adjust_head) + prog->xdp_adjust_head = 1; + if (insn->imm == BPF_FUNC_tail_call) { + /* mark bpf_tail_call as different opcode to avoid + * conditional branch in the interpeter for every normal + * call and to prevent accidental JITing by JIT compiler + * that doesn't support bpf_tail_call yet */ - BUG_ON(!prog->aux->ops->get_func_proto); - - if (insn->imm == BPF_FUNC_get_route_realm) - prog->dst_needed = 1; - if (insn->imm == BPF_FUNC_get_prandom_u32) - bpf_user_rnd_init_once(); - if (insn->imm == BPF_FUNC_xdp_adjust_head) - prog->xdp_adjust_head = 1; - if (insn->imm == BPF_FUNC_tail_call) { - /* mark bpf_tail_call as different opcode - * to avoid conditional branch in - * interpeter for every normal call - * and to prevent accidental JITing by - * JIT compiler that doesn't support - * bpf_tail_call yet - */ - insn->imm = 0; - insn->code |= BPF_X; - continue; - } + insn->imm = 0; + insn->code |= BPF_X; + continue; + } - fn = prog->aux->ops->get_func_proto(insn->imm); - /* all functions that have prototype and verifier allowed - * programs to call them, must be real in-kernel functions - */ - BUG_ON(!fn->func); - insn->imm = fn->func - __bpf_call_base; + fn = prog->aux->ops->get_func_proto(insn->imm); + /* all functions that have prototype and verifier allowed + * programs to call them, must be real in-kernel functions + */ + if (!fn->func) { + verbose("kernel subsystem misconfigured func %s#%d\n", + func_id_name(insn->imm), insn->imm); + return -EFAULT; } + insn->imm = fn->func - __bpf_call_base; } -} + return 0; +} static void free_states(struct bpf_verifier_env *env) { @@ -3383,7 +3377,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) ret = convert_ctx_accesses(env); if (ret == 0) - fixup_bpf_calls(env->prog); + ret = fixup_bpf_calls(env); if (log_level && log_len >= log_size - 1) { BUG_ON(log_len >= log_size); From 8041902dae5299c1f194ba42d14383f734631009 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 15 Mar 2017 18:26:41 -0700 Subject: [PATCH 3/6] bpf: adjust insn_aux_data when patching insns convert_ctx_accesses() replaces single bpf instruction with a set of instructions. Adjust corresponding insn_aux_data while patching. It's needed to make sure subsequent 'for(all insn)' loops have matching insn and insn_aux_data. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 44 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 39 insertions(+), 5 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5dfa9b8111da9..2990fda1c6a5d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3162,6 +3162,41 @@ static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env) insn->src_reg = 0; } +/* single env->prog->insni[off] instruction was replaced with the range + * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying + * [0, off) and [off, end) to new locations, so the patched range stays zero + */ +static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len, + u32 off, u32 cnt) +{ + struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data; + + if (cnt == 1) + return 0; + new_data = vzalloc(sizeof(struct bpf_insn_aux_data) * prog_len); + if (!new_data) + return -ENOMEM; + memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off); + memcpy(new_data + off + cnt - 1, old_data + off, + sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1)); + env->insn_aux_data = new_data; + vfree(old_data); + return 0; +} + +static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off, + const struct bpf_insn *patch, u32 len) +{ + struct bpf_prog *new_prog; + + new_prog = bpf_patch_insn_single(env->prog, off, patch, len); + if (!new_prog) + return NULL; + if (adjust_insn_aux_data(env, new_prog->len, off, len)) + return NULL; + return new_prog; +} + /* convert load instructions that access fields of 'struct __sk_buff' * into sequence of instructions that access fields of 'struct sk_buff' */ @@ -3181,10 +3216,10 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) verbose("bpf verifier is misconfigured\n"); return -EINVAL; } else if (cnt) { - new_prog = bpf_patch_insn_single(env->prog, 0, - insn_buf, cnt); + new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt); if (!new_prog) return -ENOMEM; + env->prog = new_prog; delta += cnt - 1; } @@ -3209,7 +3244,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) else continue; - if (env->insn_aux_data[i].ptr_type != PTR_TO_CTX) + if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX) continue; cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog); @@ -3218,8 +3253,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) return -EINVAL; } - new_prog = bpf_patch_insn_single(env->prog, i + delta, insn_buf, - cnt); + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); if (!new_prog) return -ENOMEM; From 81ed18ab3098b6519274545e80a29caacb77d160 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 15 Mar 2017 18:26:42 -0700 Subject: [PATCH 4/6] bpf: add helper inlining infra and optimize map_array lookup Optimize bpf_call -> bpf_map_lookup_elem() -> array_map_lookup_elem() into a sequence of bpf instructions. When JIT is on the sequence of bpf instructions is the sequence of native cpu instructions with significantly faster performance than indirect call and two function's prologue/epilogue. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + include/linux/bpf_verifier.h | 5 ++++- include/linux/filter.h | 10 ++++++++++ kernel/bpf/arraymap.c | 29 +++++++++++++++++++++++++++++ kernel/bpf/verifier.c | 36 +++++++++++++++++++++++++++++++++--- 5 files changed, 77 insertions(+), 4 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 909fc033173a7..da8c64ca8dc99 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -35,6 +35,7 @@ struct bpf_map_ops { void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file, int fd); void (*map_fd_put_ptr)(void *ptr); + u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf); }; struct bpf_map { diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index a13b031dc6b80..5efb4db44e1ef 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -66,7 +66,10 @@ struct bpf_verifier_state_list { }; struct bpf_insn_aux_data { - enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ + union { + enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ + struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */ + }; }; #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ diff --git a/include/linux/filter.h b/include/linux/filter.h index fbf7b39e81035..dffa072b7b79e 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -693,6 +693,11 @@ static inline bool bpf_jit_is_ebpf(void) # endif } +static inline bool ebpf_jit_enabled(void) +{ + return bpf_jit_enable && bpf_jit_is_ebpf(); +} + static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp) { return fp->jited && bpf_jit_is_ebpf(); @@ -753,6 +758,11 @@ void bpf_prog_kallsyms_del(struct bpf_prog *fp); #else /* CONFIG_BPF_JIT */ +static inline bool ebpf_jit_enabled(void) +{ + return false; +} + static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp) { return false; diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 6b6f41f0b2116..bcf9955fac956 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -1,4 +1,5 @@ /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com + * Copyright (c) 2016,2017 Facebook * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -113,6 +114,33 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key) return array->value + array->elem_size * index; } +/* emit BPF instructions equivalent to C code of array_map_lookup_elem() */ +static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + struct bpf_insn *insn = insn_buf; + u32 elem_size = array->elem_size; + const int ret = BPF_REG_0; + const int map_ptr = BPF_REG_1; + const int index = BPF_REG_2; + + *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value)); + *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); + *insn++ = BPF_JMP_IMM(BPF_JGE, ret, array->map.max_entries, + elem_size == 1 ? 2 : 3); + if (elem_size == 1) { + /* nop */ + } else if (is_power_of_2(elem_size)) { + *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size)); + } else { + *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size); + } + *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr); + *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); + *insn++ = BPF_MOV64_IMM(ret, 0); + return insn - insn_buf; +} + /* Called from eBPF program */ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key) { @@ -267,6 +295,7 @@ static const struct bpf_map_ops array_ops = { .map_lookup_elem = array_map_lookup_elem, .map_update_elem = array_map_update_elem, .map_delete_elem = array_map_delete_elem, + .map_gen_lookup = array_map_gen_lookup, }; static struct bpf_map_type_list array_type __ro_after_init = { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2990fda1c6a5d..90bf46787603a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1273,7 +1273,7 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env) } } -static int check_call(struct bpf_verifier_env *env, int func_id) +static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) { struct bpf_verifier_state *state = &env->cur_state; const struct bpf_func_proto *fn = NULL; @@ -1369,6 +1369,7 @@ static int check_call(struct bpf_verifier_env *env, int func_id) } regs[BPF_REG_0].map_ptr = meta.map_ptr; regs[BPF_REG_0].id = ++env->id_gen; + env->insn_aux_data[insn_idx].map_ptr = meta.map_ptr; } else { verbose("unknown return type %d of func %s#%d\n", fn->ret_type, func_id_name(func_id), func_id); @@ -2940,7 +2941,7 @@ static int do_check(struct bpf_verifier_env *env) return -EINVAL; } - err = check_call(env, insn->imm); + err = check_call(env, insn->imm, insn_idx); if (err) return err; @@ -3268,6 +3269,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) } /* fixup insn->imm field of bpf_call instructions + * and inline eligible helpers as explicit sequence of BPF instructions * * this function is called after eBPF program passed verification */ @@ -3277,7 +3279,10 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) struct bpf_insn *insn = prog->insnsi; const struct bpf_func_proto *fn; const int insn_cnt = prog->len; - int i; + struct bpf_insn insn_buf[16]; + struct bpf_prog *new_prog; + struct bpf_map *map_ptr; + int i, cnt, delta = 0; for (i = 0; i < insn_cnt; i++, insn++) { if (insn->code != (BPF_JMP | BPF_CALL)) @@ -3300,6 +3305,31 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) continue; } + if (ebpf_jit_enabled() && insn->imm == BPF_FUNC_map_lookup_elem) { + map_ptr = env->insn_aux_data[i + delta].map_ptr; + if (!map_ptr->ops->map_gen_lookup) + goto patch_call_imm; + + cnt = map_ptr->ops->map_gen_lookup(map_ptr, insn_buf); + if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) { + verbose("bpf verifier is misconfigured\n"); + return -EINVAL; + } + + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, + cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + + /* keep walking new program and skip insns we just inserted */ + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + continue; + } + +patch_call_imm: fn = prog->aux->ops->get_func_proto(insn->imm); /* all functions that have prototype and verifier allowed * programs to call them, must be real in-kernel functions From 9015d2f5953590e8273392b44c2b0f864350b427 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 15 Mar 2017 18:26:43 -0700 Subject: [PATCH 5/6] bpf: inline htab_map_lookup_elem() Optimize: bpf_call bpf_map_lookup_elem map->ops->map_lookup_elem htab_map_lookup_elem __htab_map_lookup_elem into: bpf_call __htab_map_lookup_elem to improve performance of JITed programs. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/hashtab.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index afe5bab376c98..000153acb6d57 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -419,7 +419,11 @@ static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head, return NULL; } -/* Called from syscall or from eBPF program */ +/* Called from syscall or from eBPF program directly, so + * arguments have to match bpf_map_lookup_elem() exactly. + * The return value is adjusted by BPF instructions + * in htab_map_gen_lookup(). + */ static void *__htab_map_lookup_elem(struct bpf_map *map, void *key) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); @@ -451,6 +455,30 @@ static void *htab_map_lookup_elem(struct bpf_map *map, void *key) return NULL; } +/* inline bpf_map_lookup_elem() call. + * Instead of: + * bpf_prog + * bpf_map_lookup_elem + * map->ops->map_lookup_elem + * htab_map_lookup_elem + * __htab_map_lookup_elem + * do: + * bpf_prog + * __htab_map_lookup_elem + */ +static u32 htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) +{ + struct bpf_insn *insn = insn_buf; + const int ret = BPF_REG_0; + + *insn++ = BPF_EMIT_CALL((u64 (*)(u64, u64, u64, u64, u64))__htab_map_lookup_elem); + *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1); + *insn++ = BPF_ALU64_IMM(BPF_ADD, ret, + offsetof(struct htab_elem, key) + + round_up(map->key_size, 8)); + return insn - insn_buf; +} + static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key) { struct htab_elem *l = __htab_map_lookup_elem(map, key); @@ -1062,6 +1090,7 @@ static const struct bpf_map_ops htab_ops = { .map_lookup_elem = htab_map_lookup_elem, .map_update_elem = htab_map_update_elem, .map_delete_elem = htab_map_delete_elem, + .map_gen_lookup = htab_map_gen_lookup, }; static struct bpf_map_type_list htab_type __ro_after_init = { From 95ff141e52f84f476fcde50560f42d4f118539c0 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 15 Mar 2017 18:26:44 -0700 Subject: [PATCH 6/6] samples/bpf: add map_lookup microbenchmark $ map_perf_test 128 speed of HASH bpf_map_lookup_elem() in lookups per second w/o JIT w/JIT before 46M 58M after 42M 74M perf report before: 54.23% map_perf_test [kernel.kallsyms] [k] __htab_map_lookup_elem 14.24% map_perf_test [kernel.kallsyms] [k] lookup_elem_raw 8.84% map_perf_test [kernel.kallsyms] [k] htab_map_lookup_elem 5.93% map_perf_test [kernel.kallsyms] [k] bpf_map_lookup_elem 2.30% map_perf_test [kernel.kallsyms] [k] bpf_prog_da4fc6a3f41761a2 1.49% map_perf_test [kernel.kallsyms] [k] kprobe_ftrace_handler after: 60.03% map_perf_test [kernel.kallsyms] [k] __htab_map_lookup_elem 18.07% map_perf_test [kernel.kallsyms] [k] lookup_elem_raw 2.91% map_perf_test [kernel.kallsyms] [k] bpf_prog_da4fc6a3f41761a2 1.94% map_perf_test [kernel.kallsyms] [k] _einittext 1.90% map_perf_test [kernel.kallsyms] [k] __audit_syscall_exit 1.72% map_perf_test [kernel.kallsyms] [k] kprobe_ftrace_handler Notice that bpf_map_lookup_elem() and htab_map_lookup_elem() are trivial functions, yet they take sizeable amount of cpu time. htab_map_gen_lookup() removes bpf_map_lookup_elem() and converts htab_map_lookup_elem() into three BPF insns which causing cpu time for bpf_prog_da4fc6a3f41761a2() slightly increase. $ map_perf_test 256 speed of ARRAY bpf_map_lookup_elem() in lookups per second w/o JIT w/JIT before 97M 174M after 64M 280M before: 37.33% map_perf_test [kernel.kallsyms] [k] array_map_lookup_elem 13.95% map_perf_test [kernel.kallsyms] [k] bpf_map_lookup_elem 6.54% map_perf_test [kernel.kallsyms] [k] bpf_prog_da4fc6a3f41761a2 4.57% map_perf_test [kernel.kallsyms] [k] kprobe_ftrace_handler after: 32.86% map_perf_test [kernel.kallsyms] [k] bpf_prog_da4fc6a3f41761a2 6.54% map_perf_test [kernel.kallsyms] [k] kprobe_ftrace_handler array_map_gen_lookup() removes calls to array_map_lookup_elem() and bpf_map_lookup_elem() and replaces them with 7 bpf insns. The performance without JIT is slower, since executing extra insns in the interpreter is slower than running native C code, but with JIT the performance gains are obvious, since native C->x86 code is replaced with fewer bpf->x86 instructions. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- samples/bpf/map_perf_test_kern.c | 33 ++++++++++++++++++++++++++++++++ samples/bpf/map_perf_test_user.c | 32 +++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) diff --git a/samples/bpf/map_perf_test_kern.c b/samples/bpf/map_perf_test_kern.c index a91872a97742a..9da2a3441b0a2 100644 --- a/samples/bpf/map_perf_test_kern.c +++ b/samples/bpf/map_perf_test_kern.c @@ -65,6 +65,13 @@ struct bpf_map_def SEC("maps") lpm_trie_map_alloc = { .map_flags = BPF_F_NO_PREALLOC, }; +struct bpf_map_def SEC("maps") array_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(long), + .max_entries = MAX_ENTRIES, +}; + SEC("kprobe/sys_getuid") int stress_hmap(struct pt_regs *ctx) { @@ -165,5 +172,31 @@ int stress_lpm_trie_map_alloc(struct pt_regs *ctx) return 0; } +SEC("kprobe/sys_getpgid") +int stress_hash_map_lookup(struct pt_regs *ctx) +{ + u32 key = 1, i; + long *value; + +#pragma clang loop unroll(full) + for (i = 0; i < 64; ++i) + value = bpf_map_lookup_elem(&hash_map, &key); + + return 0; +} + +SEC("kprobe/sys_getpgrp") +int stress_array_map_lookup(struct pt_regs *ctx) +{ + u32 key = 1, i; + long *value; + +#pragma clang loop unroll(full) + for (i = 0; i < 64; ++i) + value = bpf_map_lookup_elem(&array_map, &key); + + return 0; +} + char _license[] SEC("license") = "GPL"; u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c index 680260a91f50c..e29ff318a7936 100644 --- a/samples/bpf/map_perf_test_user.c +++ b/samples/bpf/map_perf_test_user.c @@ -38,6 +38,8 @@ static __u64 time_get_ns(void) #define LRU_HASH_PREALLOC (1 << 4) #define PERCPU_LRU_HASH_PREALLOC (1 << 5) #define LPM_KMALLOC (1 << 6) +#define HASH_LOOKUP (1 << 7) +#define ARRAY_LOOKUP (1 << 8) static int test_flags = ~0; @@ -125,6 +127,30 @@ static void test_lpm_kmalloc(int cpu) cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); } +static void test_hash_lookup(int cpu) +{ + __u64 start_time; + int i; + + start_time = time_get_ns(); + for (i = 0; i < MAX_CNT; i++) + syscall(__NR_getpgid, 0); + printf("%d:hash_lookup %lld lookups per sec\n", + cpu, MAX_CNT * 1000000000ll * 64 / (time_get_ns() - start_time)); +} + +static void test_array_lookup(int cpu) +{ + __u64 start_time; + int i; + + start_time = time_get_ns(); + for (i = 0; i < MAX_CNT; i++) + syscall(__NR_getpgrp, 0); + printf("%d:array_lookup %lld lookups per sec\n", + cpu, MAX_CNT * 1000000000ll * 64 / (time_get_ns() - start_time)); +} + static void loop(int cpu) { cpu_set_t cpuset; @@ -153,6 +179,12 @@ static void loop(int cpu) if (test_flags & LPM_KMALLOC) test_lpm_kmalloc(cpu); + + if (test_flags & HASH_LOOKUP) + test_hash_lookup(cpu); + + if (test_flags & ARRAY_LOOKUP) + test_array_lookup(cpu); } static void run_perf_test(int tasks)