diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 521933bc15fea..b677dcd0b77af 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -638,6 +638,7 @@ $(OUTPUT)/bench_strncmp.o: $(OUTPUT)/strncmp_bench.skel.h $(OUTPUT)/bench_bpf_hashmap_full_update.o: $(OUTPUT)/bpf_hashmap_full_update_bench.skel.h $(OUTPUT)/bench_local_storage.o: $(OUTPUT)/local_storage_bench.skel.h $(OUTPUT)/bench_local_storage_rcu_tasks_trace.o: $(OUTPUT)/local_storage_rcu_tasks_trace_bench.skel.h +$(OUTPUT)/bench_bpf_hashmap_lookup.o: $(OUTPUT)/bpf_hashmap_lookup.skel.h $(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ) $(OUTPUT)/bench: LDLIBS += -lm $(OUTPUT)/bench: $(OUTPUT)/bench.o \ @@ -652,7 +653,9 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \ $(OUTPUT)/bench_strncmp.o \ $(OUTPUT)/bench_bpf_hashmap_full_update.o \ $(OUTPUT)/bench_local_storage.o \ - $(OUTPUT)/bench_local_storage_rcu_tasks_trace.o + $(OUTPUT)/bench_local_storage_rcu_tasks_trace.o \ + $(OUTPUT)/bench_bpf_hashmap_lookup.o \ + # $(call msg,BINARY,,$@) $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@ diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index c1f20a1474624..0b2a53bb84609 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -16,6 +16,7 @@ struct env env = { .warmup_sec = 1, .duration_sec = 5, .affinity = false, + .quiet = false, .consumer_cnt = 1, .producer_cnt = 1, }; @@ -262,6 +263,7 @@ static const struct argp_option opts[] = { { "consumers", 'c', "NUM", 0, "Number of consumer threads"}, { "verbose", 'v', NULL, 0, "Verbose debug output"}, { "affinity", 'a', NULL, 0, "Set consumer/producer thread affinity"}, + { "quiet", 'q', NULL, 0, "Be more quiet"}, { "prod-affinity", ARG_PROD_AFFINITY_SET, "CPUSET", 0, "Set of CPUs for producer threads; implies --affinity"}, { "cons-affinity", ARG_CONS_AFFINITY_SET, "CPUSET", 0, @@ -275,6 +277,7 @@ extern struct argp bench_bpf_loop_argp; extern struct argp bench_local_storage_argp; extern struct argp bench_local_storage_rcu_tasks_trace_argp; extern struct argp bench_strncmp_argp; +extern struct argp bench_hashmap_lookup_argp; static const struct argp_child bench_parsers[] = { { &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 }, @@ -284,13 +287,15 @@ static const struct argp_child bench_parsers[] = { { &bench_strncmp_argp, 0, "bpf_strncmp helper benchmark", 0 }, { &bench_local_storage_rcu_tasks_trace_argp, 0, "local_storage RCU Tasks Trace slowdown benchmark", 0 }, + { &bench_hashmap_lookup_argp, 0, "Hashmap lookup benchmark", 0 }, {}, }; +/* Make pos_args global, so that we can run argp_parse twice, if necessary */ +static int pos_args; + static error_t parse_arg(int key, char *arg, struct argp_state *state) { - static int pos_args; - switch (key) { case 'v': env.verbose = true; @@ -329,6 +334,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) case 'a': env.affinity = true; break; + case 'q': + env.quiet = true; + break; case ARG_PROD_AFFINITY_SET: env.affinity = true; if (parse_num_list(arg, &env.prod_cpus.cpus, @@ -359,7 +367,7 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) return 0; } -static void parse_cmdline_args(int argc, char **argv) +static void parse_cmdline_args_init(int argc, char **argv) { static const struct argp argp = { .options = opts, @@ -369,9 +377,25 @@ static void parse_cmdline_args(int argc, char **argv) }; if (argp_parse(&argp, argc, argv, 0, NULL, NULL)) exit(1); - if (!env.list && !env.bench_name) { - argp_help(&argp, stderr, ARGP_HELP_DOC, "bench"); - exit(1); +} + +static void parse_cmdline_args_final(int argc, char **argv) +{ + struct argp_child bench_parsers[2] = {}; + const struct argp argp = { + .options = opts, + .parser = parse_arg, + .doc = argp_program_doc, + .children = bench_parsers, + }; + + /* Parse arguments the second time with the correct set of parsers */ + if (bench->argp) { + bench_parsers[0].argp = bench->argp; + bench_parsers[0].header = bench->name; + pos_args = 0; + if (argp_parse(&argp, argc, argv, 0, NULL, NULL)) + exit(1); } } @@ -490,6 +514,7 @@ extern const struct bench bench_local_storage_cache_seq_get; extern const struct bench bench_local_storage_cache_interleaved_get; extern const struct bench bench_local_storage_cache_hashmap_control; extern const struct bench bench_local_storage_tasks_trace; +extern const struct bench bench_bpf_hashmap_lookup; static const struct bench *benchs[] = { &bench_count_global, @@ -529,17 +554,17 @@ static const struct bench *benchs[] = { &bench_local_storage_cache_interleaved_get, &bench_local_storage_cache_hashmap_control, &bench_local_storage_tasks_trace, + &bench_bpf_hashmap_lookup, }; -static void setup_benchmark() +static void find_benchmark(void) { - int i, err; + int i; if (!env.bench_name) { fprintf(stderr, "benchmark name is not specified\n"); exit(1); } - for (i = 0; i < ARRAY_SIZE(benchs); i++) { if (strcmp(benchs[i]->name, env.bench_name) == 0) { bench = benchs[i]; @@ -550,8 +575,14 @@ static void setup_benchmark() fprintf(stderr, "benchmark '%s' not found\n", env.bench_name); exit(1); } +} - printf("Setting up benchmark '%s'...\n", bench->name); +static void setup_benchmark(void) +{ + int i, err; + + if (!env.quiet) + printf("Setting up benchmark '%s'...\n", bench->name); state.producers = calloc(env.producer_cnt, sizeof(*state.producers)); state.consumers = calloc(env.consumer_cnt, sizeof(*state.consumers)); @@ -597,7 +628,8 @@ static void setup_benchmark() next_cpu(&env.prod_cpus)); } - printf("Benchmark '%s' started.\n", bench->name); + if (!env.quiet) + printf("Benchmark '%s' started.\n", bench->name); } static pthread_mutex_t bench_done_mtx = PTHREAD_MUTEX_INITIALIZER; @@ -621,7 +653,7 @@ static void collect_measurements(long delta_ns) { int main(int argc, char **argv) { - parse_cmdline_args(argc, argv); + parse_cmdline_args_init(argc, argv); if (env.list) { int i; @@ -633,6 +665,9 @@ int main(int argc, char **argv) return 0; } + find_benchmark(); + parse_cmdline_args_final(argc, argv); + setup_benchmark(); setup_timer(); diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h index d748255877e2d..402729c6a3ac5 100644 --- a/tools/testing/selftests/bpf/bench.h +++ b/tools/testing/selftests/bpf/bench.h @@ -24,6 +24,7 @@ struct env { bool verbose; bool list; bool affinity; + bool quiet; int consumer_cnt; int producer_cnt; struct cpu_set prod_cpus; @@ -47,6 +48,7 @@ struct bench_res { struct bench { const char *name; + const struct argp *argp; void (*validate)(void); void (*setup)(void); void *(*producer_thread)(void *ctx); diff --git a/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c index 5bcb8a8cdeb20..7c8ccc1083138 100644 --- a/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c +++ b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c @@ -428,6 +428,7 @@ static void *consumer(void *input) const struct bench bench_bloom_lookup = { .name = "bloom-lookup", + .argp = &bench_bloom_map_argp, .validate = validate, .setup = bloom_lookup_setup, .producer_thread = producer, @@ -439,6 +440,7 @@ const struct bench bench_bloom_lookup = { const struct bench bench_bloom_update = { .name = "bloom-update", + .argp = &bench_bloom_map_argp, .validate = validate, .setup = bloom_update_setup, .producer_thread = producer, @@ -450,6 +452,7 @@ const struct bench bench_bloom_update = { const struct bench bench_bloom_false_positive = { .name = "bloom-false-positive", + .argp = &bench_bloom_map_argp, .validate = validate, .setup = false_positive_setup, .producer_thread = producer, @@ -461,6 +464,7 @@ const struct bench bench_bloom_false_positive = { const struct bench bench_hashmap_without_bloom = { .name = "hashmap-without-bloom", + .argp = &bench_bloom_map_argp, .validate = validate, .setup = hashmap_no_bloom_setup, .producer_thread = producer, @@ -472,6 +476,7 @@ const struct bench bench_hashmap_without_bloom = { const struct bench bench_hashmap_with_bloom = { .name = "hashmap-with-bloom", + .argp = &bench_bloom_map_argp, .validate = validate, .setup = hashmap_with_bloom_setup, .producer_thread = producer, diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c index cec51e0ff4b8a..75abe8137b6cf 100644 --- a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c +++ b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2022 Bytedance */ -#include #include "bench.h" #include "bpf_hashmap_full_update_bench.skel.h" #include "bpf_util.h" @@ -68,7 +67,7 @@ static void setup(void) bpf_map_update_elem(map_fd, &i, &i, BPF_ANY); } -void hashmap_report_final(struct bench_res res[], int res_cnt) +static void hashmap_report_final(struct bench_res res[], int res_cnt) { unsigned int nr_cpus = bpf_num_possible_cpus(); int i; @@ -85,7 +84,7 @@ void hashmap_report_final(struct bench_res res[], int res_cnt) } const struct bench bench_bpf_hashmap_full_update = { - .name = "bpf-hashmap-ful-update", + .name = "bpf-hashmap-full-update", .validate = validate, .setup = setup, .producer_thread = producer, diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c new file mode 100644 index 0000000000000..8dbb02f75cffc --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c @@ -0,0 +1,283 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Isovalent */ + +#include +#include +#include "bench.h" +#include "bpf_hashmap_lookup.skel.h" +#include "bpf_util.h" + +/* BPF triggering benchmarks */ +static struct ctx { + struct bpf_hashmap_lookup *skel; +} ctx; + +/* only available to kernel, so define it here */ +#define BPF_MAX_LOOPS (1<<23) + +#define MAX_KEY_SIZE 1024 /* the size of the key map */ + +static struct { + __u32 key_size; + __u32 map_flags; + __u32 max_entries; + __u32 nr_entries; + __u32 nr_loops; +} args = { + .key_size = 4, + .map_flags = 0, + .max_entries = 1000, + .nr_entries = 500, + .nr_loops = 1000000, +}; + +enum { + ARG_KEY_SIZE = 8001, + ARG_MAP_FLAGS, + ARG_MAX_ENTRIES, + ARG_NR_ENTRIES, + ARG_NR_LOOPS, +}; + +static const struct argp_option opts[] = { + { "key_size", ARG_KEY_SIZE, "KEY_SIZE", 0, + "The hashmap key size (max 1024)"}, + { "map_flags", ARG_MAP_FLAGS, "MAP_FLAGS", 0, + "The hashmap flags passed to BPF_MAP_CREATE"}, + { "max_entries", ARG_MAX_ENTRIES, "MAX_ENTRIES", 0, + "The hashmap max entries"}, + { "nr_entries", ARG_NR_ENTRIES, "NR_ENTRIES", 0, + "The number of entries to insert/lookup"}, + { "nr_loops", ARG_NR_LOOPS, "NR_LOOPS", 0, + "The number of loops for the benchmark"}, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + long ret; + + switch (key) { + case ARG_KEY_SIZE: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > MAX_KEY_SIZE) { + fprintf(stderr, "invalid key_size"); + argp_usage(state); + } + args.key_size = ret; + break; + case ARG_MAP_FLAGS: + ret = strtol(arg, NULL, 0); + if (ret < 0 || ret > UINT_MAX) { + fprintf(stderr, "invalid map_flags"); + argp_usage(state); + } + args.map_flags = ret; + break; + case ARG_MAX_ENTRIES: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > UINT_MAX) { + fprintf(stderr, "invalid max_entries"); + argp_usage(state); + } + args.max_entries = ret; + break; + case ARG_NR_ENTRIES: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > UINT_MAX) { + fprintf(stderr, "invalid nr_entries"); + argp_usage(state); + } + args.nr_entries = ret; + break; + case ARG_NR_LOOPS: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > BPF_MAX_LOOPS) { + fprintf(stderr, "invalid nr_loops: %ld (min=1 max=%u)\n", + ret, BPF_MAX_LOOPS); + argp_usage(state); + } + args.nr_loops = ret; + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +const struct argp bench_hashmap_lookup_argp = { + .options = opts, + .parser = parse_arg, +}; + +static void validate(void) +{ + if (env.consumer_cnt != 1) { + fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); + exit(1); + } + + if (args.nr_entries > args.max_entries) { + fprintf(stderr, "args.nr_entries is too big! (max %u, got %u)\n", + args.max_entries, args.nr_entries); + exit(1); + } +} + +static void *producer(void *input) +{ + while (true) { + /* trigger the bpf program */ + syscall(__NR_getpgid); + } + return NULL; +} + +static void *consumer(void *input) +{ + return NULL; +} + +static void measure(struct bench_res *res) +{ +} + +static inline void patch_key(u32 i, u32 *key) +{ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + *key = i + 1; +#else + *key = __builtin_bswap32(i + 1); +#endif + /* the rest of key is random */ +} + +static void setup(void) +{ + struct bpf_link *link; + int map_fd; + int ret; + int i; + + setup_libbpf(); + + ctx.skel = bpf_hashmap_lookup__open(); + if (!ctx.skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + bpf_map__set_max_entries(ctx.skel->maps.hash_map_bench, args.max_entries); + bpf_map__set_key_size(ctx.skel->maps.hash_map_bench, args.key_size); + bpf_map__set_value_size(ctx.skel->maps.hash_map_bench, 8); + bpf_map__set_map_flags(ctx.skel->maps.hash_map_bench, args.map_flags); + + ctx.skel->bss->nr_entries = args.nr_entries; + ctx.skel->bss->nr_loops = args.nr_loops / args.nr_entries; + + if (args.key_size > 4) { + for (i = 1; i < args.key_size/4; i++) + ctx.skel->bss->key[i] = 2654435761 * i; + } + + ret = bpf_hashmap_lookup__load(ctx.skel); + if (ret) { + bpf_hashmap_lookup__destroy(ctx.skel); + fprintf(stderr, "failed to load map: %s", strerror(-ret)); + exit(1); + } + + /* fill in the hash_map */ + map_fd = bpf_map__fd(ctx.skel->maps.hash_map_bench); + for (u64 i = 0; i < args.nr_entries; i++) { + patch_key(i, ctx.skel->bss->key); + bpf_map_update_elem(map_fd, ctx.skel->bss->key, &i, BPF_ANY); + } + + link = bpf_program__attach(ctx.skel->progs.benchmark); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static inline double events_from_time(u64 time) +{ + if (time) + return args.nr_loops * 1000000000llu / time / 1000000.0L; + + return 0; +} + +static int compute_events(u64 *times, double *events_mean, double *events_stddev, u64 *mean_time) +{ + int i, n = 0; + + *events_mean = 0; + *events_stddev = 0; + *mean_time = 0; + + for (i = 0; i < 32; i++) { + if (!times[i]) + break; + *mean_time += times[i]; + *events_mean += events_from_time(times[i]); + n += 1; + } + if (!n) + return 0; + + *mean_time /= n; + *events_mean /= n; + + if (n > 1) { + for (i = 0; i < n; i++) { + double events_i = *events_mean - events_from_time(times[i]); + *events_stddev += events_i * events_i / (n - 1); + } + *events_stddev = sqrt(*events_stddev); + } + + return n; +} + +static void hashmap_report_final(struct bench_res res[], int res_cnt) +{ + unsigned int nr_cpus = bpf_num_possible_cpus(); + double events_mean, events_stddev; + u64 mean_time; + int i, n; + + for (i = 0; i < nr_cpus; i++) { + n = compute_events(ctx.skel->bss->percpu_times[i], &events_mean, + &events_stddev, &mean_time); + if (n == 0) + continue; + + if (env.quiet) { + /* we expect only one cpu to be present */ + if (env.affinity) + printf("%.3lf\n", events_mean); + else + printf("cpu%02d %.3lf\n", i, events_mean); + } else { + printf("cpu%02d: lookup %.3lfM ± %.3lfM events/sec" + " (approximated from %d samples of ~%lums)\n", + i, events_mean, 2*events_stddev, + n, mean_time / 1000000); + } + } +} + +const struct bench bench_bpf_hashmap_lookup = { + .name = "bpf-hashmap-lookup", + .argp = &bench_hashmap_lookup_argp, + .validate = validate, + .setup = setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = NULL, + .report_final = hashmap_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c b/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c index d0a6572bfab61..d8a0394e10b15 100644 --- a/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c +++ b/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c @@ -95,6 +95,7 @@ static void setup(void) const struct bench bench_bpf_loop = { .name = "bpf-loop", + .argp = &bench_bpf_loop_argp, .validate = validate, .setup = setup, .producer_thread = producer, diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage.c b/tools/testing/selftests/bpf/benchs/bench_local_storage.c index 5a378c84e81f6..d4b2817306d42 100644 --- a/tools/testing/selftests/bpf/benchs/bench_local_storage.c +++ b/tools/testing/selftests/bpf/benchs/bench_local_storage.c @@ -255,6 +255,7 @@ static void *producer(void *input) */ const struct bench bench_local_storage_cache_seq_get = { .name = "local-storage-cache-seq-get", + .argp = &bench_local_storage_argp, .validate = validate, .setup = local_storage_cache_get_setup, .producer_thread = producer, @@ -266,6 +267,7 @@ const struct bench bench_local_storage_cache_seq_get = { const struct bench bench_local_storage_cache_interleaved_get = { .name = "local-storage-cache-int-get", + .argp = &bench_local_storage_argp, .validate = validate, .setup = local_storage_cache_get_interleaved_setup, .producer_thread = producer, @@ -277,6 +279,7 @@ const struct bench bench_local_storage_cache_interleaved_get = { const struct bench bench_local_storage_cache_hashmap_control = { .name = "local-storage-cache-hashmap-control", + .argp = &bench_local_storage_argp, .validate = validate, .setup = hashmap_setup, .producer_thread = producer, diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c b/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c index 43f109d931302..d5eb5587f2aa9 100644 --- a/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c +++ b/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c @@ -12,17 +12,14 @@ static struct { __u32 nr_procs; __u32 kthread_pid; - bool quiet; } args = { .nr_procs = 1000, .kthread_pid = 0, - .quiet = false, }; enum { ARG_NR_PROCS = 7000, ARG_KTHREAD_PID = 7001, - ARG_QUIET = 7002, }; static const struct argp_option opts[] = { @@ -30,8 +27,6 @@ static const struct argp_option opts[] = { "Set number of user processes to spin up"}, { "kthread_pid", ARG_KTHREAD_PID, "PID", 0, "Pid of rcu_tasks_trace kthread for ticks tracking"}, - { "quiet", ARG_QUIET, "{0,1}", 0, - "If true, don't report progress"}, {}, }; @@ -56,14 +51,6 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) } args.kthread_pid = ret; break; - case ARG_QUIET: - ret = strtol(arg, NULL, 10); - if (ret < 0 || ret > 1) { - fprintf(stderr, "invalid quiet %ld\n", ret); - argp_usage(state); - } - args.quiet = ret; - break; break; default: return ARGP_ERR_UNKNOWN; @@ -230,7 +217,7 @@ static void report_progress(int iter, struct bench_res *res, long delta_ns) exit(1); } - if (args.quiet) + if (env.quiet) return; printf("Iter %d\t avg tasks_trace grace period latency\t%lf ns\n", @@ -271,6 +258,7 @@ static void report_final(struct bench_res res[], int res_cnt) */ const struct bench bench_local_storage_tasks_trace = { .name = "local-storage-tasks-trace", + .argp = &bench_local_storage_rcu_tasks_trace_argp, .validate = validate, .setup = local_storage_tasks_trace_setup, .producer_thread = producer, diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c index c2554f9695ffb..fc91fdac4faa5 100644 --- a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c +++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c @@ -518,6 +518,7 @@ static void *perfbuf_custom_consumer(void *input) const struct bench bench_rb_libbpf = { .name = "rb-libbpf", + .argp = &bench_ringbufs_argp, .validate = bufs_validate, .setup = ringbuf_libbpf_setup, .producer_thread = bufs_sample_producer, @@ -529,6 +530,7 @@ const struct bench bench_rb_libbpf = { const struct bench bench_rb_custom = { .name = "rb-custom", + .argp = &bench_ringbufs_argp, .validate = bufs_validate, .setup = ringbuf_custom_setup, .producer_thread = bufs_sample_producer, @@ -540,6 +542,7 @@ const struct bench bench_rb_custom = { const struct bench bench_pb_libbpf = { .name = "pb-libbpf", + .argp = &bench_ringbufs_argp, .validate = bufs_validate, .setup = perfbuf_libbpf_setup, .producer_thread = bufs_sample_producer, @@ -551,6 +554,7 @@ const struct bench bench_pb_libbpf = { const struct bench bench_pb_custom = { .name = "pb-custom", + .argp = &bench_ringbufs_argp, .validate = bufs_validate, .setup = perfbuf_libbpf_setup, .producer_thread = bufs_sample_producer, diff --git a/tools/testing/selftests/bpf/benchs/bench_strncmp.c b/tools/testing/selftests/bpf/benchs/bench_strncmp.c index 494b591c02897..d3fad2ba6916f 100644 --- a/tools/testing/selftests/bpf/benchs/bench_strncmp.c +++ b/tools/testing/selftests/bpf/benchs/bench_strncmp.c @@ -140,6 +140,7 @@ static void strncmp_measure(struct bench_res *res) const struct bench bench_strncmp_no_helper = { .name = "strncmp-no-helper", + .argp = &bench_strncmp_argp, .validate = strncmp_validate, .setup = strncmp_no_helper_setup, .producer_thread = strncmp_producer, @@ -151,6 +152,7 @@ const struct bench bench_strncmp_no_helper = { const struct bench bench_strncmp_helper = { .name = "strncmp-helper", + .argp = &bench_strncmp_argp, .validate = strncmp_validate, .setup = strncmp_helper_setup, .producer_thread = strncmp_producer, diff --git a/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh b/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh index 1e2de838f9fa9..cd2efd3fdef3a 100755 --- a/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh +++ b/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh @@ -6,6 +6,6 @@ source ./benchs/run_common.sh set -eufo pipefail nr_threads=`expr $(cat /proc/cpuinfo | grep "processor"| wc -l) - 1` -summary=$($RUN_BENCH -p $nr_threads bpf-hashmap-ful-update) +summary=$($RUN_BENCH -p $nr_threads bpf-hashmap-full-update) printf "$summary" printf "\n" diff --git a/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh b/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh index 5dac1f02892cf..3e8a969f20967 100755 --- a/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh +++ b/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh @@ -8,4 +8,4 @@ if [ -z $kthread_pid ]; then exit 1 fi -./bench --nr_procs 15000 --kthread_pid $kthread_pid -d 600 --quiet 1 local-storage-tasks-trace +./bench --nr_procs 15000 --kthread_pid $kthread_pid -d 600 --quiet local-storage-tasks-trace diff --git a/tools/testing/selftests/bpf/progs/bpf_hashmap_lookup.c b/tools/testing/selftests/bpf/progs/bpf_hashmap_lookup.c new file mode 100644 index 0000000000000..1eb74ddca4144 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_hashmap_lookup.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Isovalent */ + +#include "vmlinux.h" + +#include +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); +} hash_map_bench SEC(".maps"); + +/* The number of slots to store times */ +#define NR_SLOTS 32 +#define NR_CPUS 256 +#define CPU_MASK (NR_CPUS-1) + +/* Configured by userspace */ +u64 nr_entries; +u64 nr_loops; +u32 __attribute__((__aligned__(8))) key[NR_CPUS]; + +/* Filled by us */ +u64 __attribute__((__aligned__(256))) percpu_times_index[NR_CPUS]; +u64 __attribute__((__aligned__(256))) percpu_times[NR_CPUS][NR_SLOTS]; + +static inline void patch_key(u32 i) +{ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + key[0] = i + 1; +#else + key[0] = __builtin_bswap32(i + 1); +#endif + /* the rest of key is random and is configured by userspace */ +} + +static int lookup_callback(__u32 index, u32 *unused) +{ + patch_key(index); + return bpf_map_lookup_elem(&hash_map_bench, key) ? 0 : 1; +} + +static int loop_lookup_callback(__u32 index, u32 *unused) +{ + return bpf_loop(nr_entries, lookup_callback, NULL, 0) ? 0 : 1; +} + +SEC("fentry/" SYS_PREFIX "sys_getpgid") +int benchmark(void *ctx) +{ + u32 cpu = bpf_get_smp_processor_id(); + u32 times_index; + u64 start_time; + + times_index = percpu_times_index[cpu & CPU_MASK] % NR_SLOTS; + start_time = bpf_ktime_get_ns(); + bpf_loop(nr_loops, loop_lookup_callback, NULL, 0); + percpu_times[cpu & CPU_MASK][times_index] = bpf_ktime_get_ns() - start_time; + percpu_times_index[cpu & CPU_MASK] += 1; + return 0; +}