From 4db98ab445c58bd26c303ef7a10ccd8f049acc22 Mon Sep 17 00:00:00 2001
From: Anton Protopopov <aspsk@isovalent.com>
Date: Mon, 13 Feb 2023 09:15:13 +0000
Subject: [PATCH 1/7] selftest/bpf/benchs: Fix a typo in
 bpf_hashmap_full_update

To call the bpf_hashmap_full_update benchmark, one should say:

    bench bpf-hashmap-ful-update

The patch adds a missing 'l' to the benchmark name.

Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20230213091519.1202813-2-aspsk@isovalent.com
---
 .../selftests/bpf/benchs/bench_bpf_hashmap_full_update.c        | 2 +-
 .../selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c
index cec51e0ff4b8a..44706acf632a7 100644
--- a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c
+++ b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c
@@ -85,7 +85,7 @@ void hashmap_report_final(struct bench_res res[], int res_cnt)
 }
 
 const struct bench bench_bpf_hashmap_full_update = {
-	.name = "bpf-hashmap-ful-update",
+	.name = "bpf-hashmap-full-update",
 	.validate = validate,
 	.setup = setup,
 	.producer_thread = producer,
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh b/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh
index 1e2de838f9fa9..cd2efd3fdef3a 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh
@@ -6,6 +6,6 @@ source ./benchs/run_common.sh
 set -eufo pipefail
 
 nr_threads=`expr $(cat /proc/cpuinfo | grep "processor"| wc -l) - 1`
-summary=$($RUN_BENCH -p $nr_threads bpf-hashmap-ful-update)
+summary=$($RUN_BENCH -p $nr_threads bpf-hashmap-full-update)
 printf "$summary"
 printf "\n"

From 2f1c59637fb17dbb2a725c3bd48e4d9d3809df89 Mon Sep 17 00:00:00 2001
From: Anton Protopopov <aspsk@isovalent.com>
Date: Mon, 13 Feb 2023 09:15:14 +0000
Subject: [PATCH 2/7] selftest/bpf/benchs: Make a function static in
 bpf_hashmap_full_update

The hashmap_report_final callback function defined in the
benchs/bench_bpf_hashmap_full_update.c file should be static.

Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20230213091519.1202813-3-aspsk@isovalent.com
---
 .../selftests/bpf/benchs/bench_bpf_hashmap_full_update.c        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c
index 44706acf632a7..67f76415a362a 100644
--- a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c
+++ b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c
@@ -68,7 +68,7 @@ static void setup(void)
 		bpf_map_update_elem(map_fd, &i, &i, BPF_ANY);
 }
 
-void hashmap_report_final(struct bench_res res[], int res_cnt)
+static void hashmap_report_final(struct bench_res res[], int res_cnt)
 {
 	unsigned int nr_cpus = bpf_num_possible_cpus();
 	int i;

From 22ff7aeaa9e3d0533df613da3500db1ecf452253 Mon Sep 17 00:00:00 2001
From: Anton Protopopov <aspsk@isovalent.com>
Date: Mon, 13 Feb 2023 09:15:15 +0000
Subject: [PATCH 3/7] selftest/bpf/benchs: Enhance argp parsing

To parse command line the bench utility uses the argp_parse() function. This
function takes as an argument a parent 'struct argp' structure which defines
common command line options and an array of children 'struct argp' structures
which defines additional command line options for particular benchmarks. This
implementation doesn't allow benchmarks to share option names, e.g., if two
benchmarks want to use, say, the --option option, then only one of them will
succeed (the first one encountered in the array).  This will be convenient if
same option names could be used in different benchmarks (with the same
semantics, e.g., --nr_loops=N).

Fix this by calling the argp_parse() function twice. The first call is the same
as it was before, with all children argps, and helps to find the benchmark name
and to print a combined help message if anything is wrong.  Given the name, we
can call the argp_parse the second time, but now the children array points only
to a correct benchmark thus always calling the correct parsers. (If there's no
a specific list of arguments, then only one call to argp_parse will be done.)

Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20230213091519.1202813-4-aspsk@isovalent.com
---
 tools/testing/selftests/bpf/bench.c           | 44 ++++++++++++++-----
 tools/testing/selftests/bpf/bench.h           |  1 +
 .../bpf/benchs/bench_bloom_filter_map.c       |  5 +++
 .../selftests/bpf/benchs/bench_bpf_loop.c     |  1 +
 .../bpf/benchs/bench_local_storage.c          |  3 ++
 .../bench_local_storage_rcu_tasks_trace.c     |  1 +
 .../selftests/bpf/benchs/bench_ringbufs.c     |  4 ++
 .../selftests/bpf/benchs/bench_strncmp.c      |  2 +
 8 files changed, 51 insertions(+), 10 deletions(-)

diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index c1f20a1474624..12c3b3ab84aab 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -287,10 +287,11 @@ static const struct argp_child bench_parsers[] = {
 	{},
 };
 
+/* Make pos_args global, so that we can run argp_parse twice, if necessary */
+static int pos_args;
+
 static error_t parse_arg(int key, char *arg, struct argp_state *state)
 {
-	static int pos_args;
-
 	switch (key) {
 	case 'v':
 		env.verbose = true;
@@ -359,7 +360,7 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 	return 0;
 }
 
-static void parse_cmdline_args(int argc, char **argv)
+static void parse_cmdline_args_init(int argc, char **argv)
 {
 	static const struct argp argp = {
 		.options = opts,
@@ -369,9 +370,25 @@ static void parse_cmdline_args(int argc, char **argv)
 	};
 	if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
 		exit(1);
-	if (!env.list && !env.bench_name) {
-		argp_help(&argp, stderr, ARGP_HELP_DOC, "bench");
-		exit(1);
+}
+
+static void parse_cmdline_args_final(int argc, char **argv)
+{
+	struct argp_child bench_parsers[2] = {};
+	const struct argp argp = {
+		.options = opts,
+		.parser = parse_arg,
+		.doc = argp_program_doc,
+		.children = bench_parsers,
+	};
+
+	/* Parse arguments the second time with the correct set of parsers */
+	if (bench->argp) {
+		bench_parsers[0].argp = bench->argp;
+		bench_parsers[0].header = bench->name;
+		pos_args = 0;
+		if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
+			exit(1);
 	}
 }
 
@@ -531,15 +548,14 @@ static const struct bench *benchs[] = {
 	&bench_local_storage_tasks_trace,
 };
 
-static void setup_benchmark()
+static void find_benchmark(void)
 {
-	int i, err;
+	int i;
 
 	if (!env.bench_name) {
 		fprintf(stderr, "benchmark name is not specified\n");
 		exit(1);
 	}
-
 	for (i = 0; i < ARRAY_SIZE(benchs); i++) {
 		if (strcmp(benchs[i]->name, env.bench_name) == 0) {
 			bench = benchs[i];
@@ -550,6 +566,11 @@ static void setup_benchmark()
 		fprintf(stderr, "benchmark '%s' not found\n", env.bench_name);
 		exit(1);
 	}
+}
+
+static void setup_benchmark(void)
+{
+	int i, err;
 
 	printf("Setting up benchmark '%s'...\n", bench->name);
 
@@ -621,7 +642,7 @@ static void collect_measurements(long delta_ns) {
 
 int main(int argc, char **argv)
 {
-	parse_cmdline_args(argc, argv);
+	parse_cmdline_args_init(argc, argv);
 
 	if (env.list) {
 		int i;
@@ -633,6 +654,9 @@ int main(int argc, char **argv)
 		return 0;
 	}
 
+	find_benchmark();
+	parse_cmdline_args_final(argc, argv);
+
 	setup_benchmark();
 
 	setup_timer();
diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h
index d748255877e2d..3c8afa0131a3a 100644
--- a/tools/testing/selftests/bpf/bench.h
+++ b/tools/testing/selftests/bpf/bench.h
@@ -47,6 +47,7 @@ struct bench_res {
 
 struct bench {
 	const char *name;
+	const struct argp *argp;
 	void (*validate)(void);
 	void (*setup)(void);
 	void *(*producer_thread)(void *ctx);
diff --git a/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c
index 5bcb8a8cdeb20..7c8ccc1083138 100644
--- a/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c
+++ b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c
@@ -428,6 +428,7 @@ static void *consumer(void *input)
 
 const struct bench bench_bloom_lookup = {
 	.name = "bloom-lookup",
+	.argp = &bench_bloom_map_argp,
 	.validate = validate,
 	.setup = bloom_lookup_setup,
 	.producer_thread = producer,
@@ -439,6 +440,7 @@ const struct bench bench_bloom_lookup = {
 
 const struct bench bench_bloom_update = {
 	.name = "bloom-update",
+	.argp = &bench_bloom_map_argp,
 	.validate = validate,
 	.setup = bloom_update_setup,
 	.producer_thread = producer,
@@ -450,6 +452,7 @@ const struct bench bench_bloom_update = {
 
 const struct bench bench_bloom_false_positive = {
 	.name = "bloom-false-positive",
+	.argp = &bench_bloom_map_argp,
 	.validate = validate,
 	.setup = false_positive_setup,
 	.producer_thread = producer,
@@ -461,6 +464,7 @@ const struct bench bench_bloom_false_positive = {
 
 const struct bench bench_hashmap_without_bloom = {
 	.name = "hashmap-without-bloom",
+	.argp = &bench_bloom_map_argp,
 	.validate = validate,
 	.setup = hashmap_no_bloom_setup,
 	.producer_thread = producer,
@@ -472,6 +476,7 @@ const struct bench bench_hashmap_without_bloom = {
 
 const struct bench bench_hashmap_with_bloom = {
 	.name = "hashmap-with-bloom",
+	.argp = &bench_bloom_map_argp,
 	.validate = validate,
 	.setup = hashmap_with_bloom_setup,
 	.producer_thread = producer,
diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c b/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c
index d0a6572bfab61..d8a0394e10b15 100644
--- a/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c
+++ b/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c
@@ -95,6 +95,7 @@ static void setup(void)
 
 const struct bench bench_bpf_loop = {
 	.name = "bpf-loop",
+	.argp = &bench_bpf_loop_argp,
 	.validate = validate,
 	.setup = setup,
 	.producer_thread = producer,
diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage.c b/tools/testing/selftests/bpf/benchs/bench_local_storage.c
index 5a378c84e81f6..d4b2817306d42 100644
--- a/tools/testing/selftests/bpf/benchs/bench_local_storage.c
+++ b/tools/testing/selftests/bpf/benchs/bench_local_storage.c
@@ -255,6 +255,7 @@ static void *producer(void *input)
  */
 const struct bench bench_local_storage_cache_seq_get = {
 	.name = "local-storage-cache-seq-get",
+	.argp = &bench_local_storage_argp,
 	.validate = validate,
 	.setup = local_storage_cache_get_setup,
 	.producer_thread = producer,
@@ -266,6 +267,7 @@ const struct bench bench_local_storage_cache_seq_get = {
 
 const struct bench bench_local_storage_cache_interleaved_get = {
 	.name = "local-storage-cache-int-get",
+	.argp = &bench_local_storage_argp,
 	.validate = validate,
 	.setup = local_storage_cache_get_interleaved_setup,
 	.producer_thread = producer,
@@ -277,6 +279,7 @@ const struct bench bench_local_storage_cache_interleaved_get = {
 
 const struct bench bench_local_storage_cache_hashmap_control = {
 	.name = "local-storage-cache-hashmap-control",
+	.argp = &bench_local_storage_argp,
 	.validate = validate,
 	.setup = hashmap_setup,
 	.producer_thread = producer,
diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c b/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c
index 43f109d931302..4f9401ecf09ce 100644
--- a/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c
+++ b/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c
@@ -271,6 +271,7 @@ static void report_final(struct bench_res res[], int res_cnt)
  */
 const struct bench bench_local_storage_tasks_trace = {
 	.name = "local-storage-tasks-trace",
+	.argp = &bench_local_storage_rcu_tasks_trace_argp,
 	.validate = validate,
 	.setup = local_storage_tasks_trace_setup,
 	.producer_thread = producer,
diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
index c2554f9695ffb..fc91fdac4faa5 100644
--- a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
+++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
@@ -518,6 +518,7 @@ static void *perfbuf_custom_consumer(void *input)
 
 const struct bench bench_rb_libbpf = {
 	.name = "rb-libbpf",
+	.argp = &bench_ringbufs_argp,
 	.validate = bufs_validate,
 	.setup = ringbuf_libbpf_setup,
 	.producer_thread = bufs_sample_producer,
@@ -529,6 +530,7 @@ const struct bench bench_rb_libbpf = {
 
 const struct bench bench_rb_custom = {
 	.name = "rb-custom",
+	.argp = &bench_ringbufs_argp,
 	.validate = bufs_validate,
 	.setup = ringbuf_custom_setup,
 	.producer_thread = bufs_sample_producer,
@@ -540,6 +542,7 @@ const struct bench bench_rb_custom = {
 
 const struct bench bench_pb_libbpf = {
 	.name = "pb-libbpf",
+	.argp = &bench_ringbufs_argp,
 	.validate = bufs_validate,
 	.setup = perfbuf_libbpf_setup,
 	.producer_thread = bufs_sample_producer,
@@ -551,6 +554,7 @@ const struct bench bench_pb_libbpf = {
 
 const struct bench bench_pb_custom = {
 	.name = "pb-custom",
+	.argp = &bench_ringbufs_argp,
 	.validate = bufs_validate,
 	.setup = perfbuf_libbpf_setup,
 	.producer_thread = bufs_sample_producer,
diff --git a/tools/testing/selftests/bpf/benchs/bench_strncmp.c b/tools/testing/selftests/bpf/benchs/bench_strncmp.c
index 494b591c02897..d3fad2ba6916f 100644
--- a/tools/testing/selftests/bpf/benchs/bench_strncmp.c
+++ b/tools/testing/selftests/bpf/benchs/bench_strncmp.c
@@ -140,6 +140,7 @@ static void strncmp_measure(struct bench_res *res)
 
 const struct bench bench_strncmp_no_helper = {
 	.name = "strncmp-no-helper",
+	.argp = &bench_strncmp_argp,
 	.validate = strncmp_validate,
 	.setup = strncmp_no_helper_setup,
 	.producer_thread = strncmp_producer,
@@ -151,6 +152,7 @@ const struct bench bench_strncmp_no_helper = {
 
 const struct bench bench_strncmp_helper = {
 	.name = "strncmp-helper",
+	.argp = &bench_strncmp_argp,
 	.validate = strncmp_validate,
 	.setup = strncmp_helper_setup,
 	.producer_thread = strncmp_producer,

From 9644546260eac49348b2c0694b01bdf72c627194 Mon Sep 17 00:00:00 2001
From: Anton Protopopov <aspsk@isovalent.com>
Date: Mon, 13 Feb 2023 09:15:16 +0000
Subject: [PATCH 4/7] selftest/bpf/benchs: Remove an unused header

The benchs/bench_bpf_hashmap_full_update.c doesn't set a custom argp,
so it shouldn't include the <argp.h> header.

Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20230213091519.1202813-5-aspsk@isovalent.com
---
 .../testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c
index 67f76415a362a..75abe8137b6cf 100644
--- a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c
+++ b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c
@@ -1,7 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2022 Bytedance */
 
-#include <argp.h>
 #include "bench.h"
 #include "bpf_hashmap_full_update_bench.skel.h"
 #include "bpf_util.h"

From 90c22503cd8910c54a8cd4bfe5bb6873d9ba8e0b Mon Sep 17 00:00:00 2001
From: Anton Protopopov <aspsk@isovalent.com>
Date: Mon, 13 Feb 2023 09:15:17 +0000
Subject: [PATCH 5/7] selftest/bpf/benchs: Make quiet option common

The "local-storage-tasks-trace" benchmark has a `--quiet` option. Move it to
the list of common options, so that the main code and other benchmarks can use
(new) env.quiet variable. Patch the run_bench_local_storage_rcu_tasks_trace.sh
helper script accordingly.

Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20230213091519.1202813-6-aspsk@isovalent.com
---
 tools/testing/selftests/bpf/bench.c               |  5 +++++
 tools/testing/selftests/bpf/bench.h               |  1 +
 .../benchs/bench_local_storage_rcu_tasks_trace.c  | 15 +--------------
 .../run_bench_local_storage_rcu_tasks_trace.sh    |  2 +-
 4 files changed, 8 insertions(+), 15 deletions(-)

diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index 12c3b3ab84aab..23c24c3461307 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -16,6 +16,7 @@ struct env env = {
 	.warmup_sec = 1,
 	.duration_sec = 5,
 	.affinity = false,
+	.quiet = false,
 	.consumer_cnt = 1,
 	.producer_cnt = 1,
 };
@@ -262,6 +263,7 @@ static const struct argp_option opts[] = {
 	{ "consumers", 'c', "NUM", 0, "Number of consumer threads"},
 	{ "verbose", 'v', NULL, 0, "Verbose debug output"},
 	{ "affinity", 'a', NULL, 0, "Set consumer/producer thread affinity"},
+	{ "quiet", 'q', NULL, 0, "Be more quiet"},
 	{ "prod-affinity", ARG_PROD_AFFINITY_SET, "CPUSET", 0,
 	  "Set of CPUs for producer threads; implies --affinity"},
 	{ "cons-affinity", ARG_CONS_AFFINITY_SET, "CPUSET", 0,
@@ -330,6 +332,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 	case 'a':
 		env.affinity = true;
 		break;
+	case 'q':
+		env.quiet = true;
+		break;
 	case ARG_PROD_AFFINITY_SET:
 		env.affinity = true;
 		if (parse_num_list(arg, &env.prod_cpus.cpus,
diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h
index 3c8afa0131a3a..402729c6a3ac5 100644
--- a/tools/testing/selftests/bpf/bench.h
+++ b/tools/testing/selftests/bpf/bench.h
@@ -24,6 +24,7 @@ struct env {
 	bool verbose;
 	bool list;
 	bool affinity;
+	bool quiet;
 	int consumer_cnt;
 	int producer_cnt;
 	struct cpu_set prod_cpus;
diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c b/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c
index 4f9401ecf09ce..d5eb5587f2aa9 100644
--- a/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c
+++ b/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c
@@ -12,17 +12,14 @@
 static struct {
 	__u32 nr_procs;
 	__u32 kthread_pid;
-	bool quiet;
 } args = {
 	.nr_procs = 1000,
 	.kthread_pid = 0,
-	.quiet = false,
 };
 
 enum {
 	ARG_NR_PROCS = 7000,
 	ARG_KTHREAD_PID = 7001,
-	ARG_QUIET = 7002,
 };
 
 static const struct argp_option opts[] = {
@@ -30,8 +27,6 @@ static const struct argp_option opts[] = {
 		"Set number of user processes to spin up"},
 	{ "kthread_pid", ARG_KTHREAD_PID, "PID", 0,
 		"Pid of rcu_tasks_trace kthread for ticks tracking"},
-	{ "quiet", ARG_QUIET, "{0,1}", 0,
-		"If true, don't report progress"},
 	{},
 };
 
@@ -56,14 +51,6 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 		}
 		args.kthread_pid = ret;
 		break;
-	case ARG_QUIET:
-		ret = strtol(arg, NULL, 10);
-		if (ret < 0 || ret > 1) {
-			fprintf(stderr, "invalid quiet %ld\n", ret);
-			argp_usage(state);
-		}
-		args.quiet = ret;
-		break;
 break;
 	default:
 		return ARGP_ERR_UNKNOWN;
@@ -230,7 +217,7 @@ static void report_progress(int iter, struct bench_res *res, long delta_ns)
 		exit(1);
 	}
 
-	if (args.quiet)
+	if (env.quiet)
 		return;
 
 	printf("Iter %d\t avg tasks_trace grace period latency\t%lf ns\n",
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh b/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh
index 5dac1f02892cf..3e8a969f20967 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh
@@ -8,4 +8,4 @@ if [ -z $kthread_pid ]; then
 	exit 1
 fi
 
-./bench --nr_procs 15000 --kthread_pid $kthread_pid -d 600 --quiet 1 local-storage-tasks-trace
+./bench --nr_procs 15000 --kthread_pid $kthread_pid -d 600 --quiet local-storage-tasks-trace

From a237dda05e9101404a634ac53ee65c8f8c8fce58 Mon Sep 17 00:00:00 2001
From: Anton Protopopov <aspsk@isovalent.com>
Date: Mon, 13 Feb 2023 09:15:18 +0000
Subject: [PATCH 6/7] selftest/bpf/benchs: Print less if the quiet option is
 set

The bench utility will print

    Setting up benchmark '<bench-name>'...
    Benchmark '<bench-name>' started.

on startup to stdout. Suppress this output if --quiet option if given. This
makes it simpler to parse benchmark output by a script.

Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20230213091519.1202813-7-aspsk@isovalent.com
---
 tools/testing/selftests/bpf/bench.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index 23c24c3461307..767ca679ee676 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -577,7 +577,8 @@ static void setup_benchmark(void)
 {
 	int i, err;
 
-	printf("Setting up benchmark '%s'...\n", bench->name);
+	if (!env.quiet)
+		printf("Setting up benchmark '%s'...\n", bench->name);
 
 	state.producers = calloc(env.producer_cnt, sizeof(*state.producers));
 	state.consumers = calloc(env.consumer_cnt, sizeof(*state.consumers));
@@ -623,7 +624,8 @@ static void setup_benchmark(void)
 					    next_cpu(&env.prod_cpus));
 	}
 
-	printf("Benchmark '%s' started.\n", bench->name);
+	if (!env.quiet)
+		printf("Benchmark '%s' started.\n", bench->name);
 }
 
 static pthread_mutex_t bench_done_mtx = PTHREAD_MUTEX_INITIALIZER;

From f371f2dc53d107af25171f29c852a3908ee0afb6 Mon Sep 17 00:00:00 2001
From: Anton Protopopov <aspsk@isovalent.com>
Date: Mon, 13 Feb 2023 09:15:19 +0000
Subject: [PATCH 7/7] selftest/bpf/benchs: Add benchmark for hashmap lookups
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a new benchmark which measures hashmap lookup operations speed.  A user can
control the following parameters of the benchmark:

    * key_size (max 1024): the key size to use
    * max_entries: the hashmap max entries
    * nr_entries: the number of entries to insert/lookup
    * nr_loops: the number of loops for the benchmark
    * map_flags The hashmap flags passed to BPF_MAP_CREATE

The BPF program performing the benchmarks calls two nested bpf_loop:

    bpf_loop(nr_loops/nr_entries)
            bpf_loop(nr_entries)
                     bpf_map_lookup()

So the nr_loops determines the number of actual map lookups. All lookups are
successful.

Example (the output is generated on a AMD Ryzen 9 3950X machine):

    for nr_entries in `seq 4096 4096 65536`; do echo -n "$((nr_entries*100/65536))% full: "; sudo ./bench -d2 -a bpf-hashmap-lookup --key_size=4 --nr_entries=$nr_entries --max_entries=65536 --nr_loops=1000000 --map_flags=0x40 | grep cpu; done
    6% full: cpu01: lookup 50.739M ± 0.018M events/sec (approximated from 32 samples of ~19ms)
    12% full: cpu01: lookup 47.751M ± 0.015M events/sec (approximated from 32 samples of ~20ms)
    18% full: cpu01: lookup 45.153M ± 0.013M events/sec (approximated from 32 samples of ~22ms)
    25% full: cpu01: lookup 43.826M ± 0.014M events/sec (approximated from 32 samples of ~22ms)
    31% full: cpu01: lookup 41.971M ± 0.012M events/sec (approximated from 32 samples of ~23ms)
    37% full: cpu01: lookup 41.034M ± 0.015M events/sec (approximated from 32 samples of ~24ms)
    43% full: cpu01: lookup 39.946M ± 0.012M events/sec (approximated from 32 samples of ~25ms)
    50% full: cpu01: lookup 38.256M ± 0.014M events/sec (approximated from 32 samples of ~26ms)
    56% full: cpu01: lookup 36.580M ± 0.018M events/sec (approximated from 32 samples of ~27ms)
    62% full: cpu01: lookup 36.252M ± 0.012M events/sec (approximated from 32 samples of ~27ms)
    68% full: cpu01: lookup 35.200M ± 0.012M events/sec (approximated from 32 samples of ~28ms)
    75% full: cpu01: lookup 34.061M ± 0.009M events/sec (approximated from 32 samples of ~29ms)
    81% full: cpu01: lookup 34.374M ± 0.010M events/sec (approximated from 32 samples of ~29ms)
    87% full: cpu01: lookup 33.244M ± 0.011M events/sec (approximated from 32 samples of ~30ms)
    93% full: cpu01: lookup 32.182M ± 0.013M events/sec (approximated from 32 samples of ~31ms)
    100% full: cpu01: lookup 31.497M ± 0.016M events/sec (approximated from 32 samples of ~31ms)

Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20230213091519.1202813-8-aspsk@isovalent.com
---
 tools/testing/selftests/bpf/Makefile          |   5 +-
 tools/testing/selftests/bpf/bench.c           |   4 +
 .../bpf/benchs/bench_bpf_hashmap_lookup.c     | 283 ++++++++++++++++++
 .../selftests/bpf/progs/bpf_hashmap_lookup.c  |  63 ++++
 4 files changed, 354 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_hashmap_lookup.c

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 521933bc15fea..b677dcd0b77af 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -638,6 +638,7 @@ $(OUTPUT)/bench_strncmp.o: $(OUTPUT)/strncmp_bench.skel.h
 $(OUTPUT)/bench_bpf_hashmap_full_update.o: $(OUTPUT)/bpf_hashmap_full_update_bench.skel.h
 $(OUTPUT)/bench_local_storage.o: $(OUTPUT)/local_storage_bench.skel.h
 $(OUTPUT)/bench_local_storage_rcu_tasks_trace.o: $(OUTPUT)/local_storage_rcu_tasks_trace_bench.skel.h
+$(OUTPUT)/bench_bpf_hashmap_lookup.o: $(OUTPUT)/bpf_hashmap_lookup.skel.h
 $(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ)
 $(OUTPUT)/bench: LDLIBS += -lm
 $(OUTPUT)/bench: $(OUTPUT)/bench.o \
@@ -652,7 +653,9 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \
 		 $(OUTPUT)/bench_strncmp.o \
 		 $(OUTPUT)/bench_bpf_hashmap_full_update.o \
 		 $(OUTPUT)/bench_local_storage.o \
-		 $(OUTPUT)/bench_local_storage_rcu_tasks_trace.o
+		 $(OUTPUT)/bench_local_storage_rcu_tasks_trace.o \
+		 $(OUTPUT)/bench_bpf_hashmap_lookup.o \
+		 #
 	$(call msg,BINARY,,$@)
 	$(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@
 
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index 767ca679ee676..0b2a53bb84609 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -277,6 +277,7 @@ extern struct argp bench_bpf_loop_argp;
 extern struct argp bench_local_storage_argp;
 extern struct argp bench_local_storage_rcu_tasks_trace_argp;
 extern struct argp bench_strncmp_argp;
+extern struct argp bench_hashmap_lookup_argp;
 
 static const struct argp_child bench_parsers[] = {
 	{ &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 },
@@ -286,6 +287,7 @@ static const struct argp_child bench_parsers[] = {
 	{ &bench_strncmp_argp, 0, "bpf_strncmp helper benchmark", 0 },
 	{ &bench_local_storage_rcu_tasks_trace_argp, 0,
 		"local_storage RCU Tasks Trace slowdown benchmark", 0 },
+	{ &bench_hashmap_lookup_argp, 0, "Hashmap lookup benchmark", 0 },
 	{},
 };
 
@@ -512,6 +514,7 @@ extern const struct bench bench_local_storage_cache_seq_get;
 extern const struct bench bench_local_storage_cache_interleaved_get;
 extern const struct bench bench_local_storage_cache_hashmap_control;
 extern const struct bench bench_local_storage_tasks_trace;
+extern const struct bench bench_bpf_hashmap_lookup;
 
 static const struct bench *benchs[] = {
 	&bench_count_global,
@@ -551,6 +554,7 @@ static const struct bench *benchs[] = {
 	&bench_local_storage_cache_interleaved_get,
 	&bench_local_storage_cache_hashmap_control,
 	&bench_local_storage_tasks_trace,
+	&bench_bpf_hashmap_lookup,
 };
 
 static void find_benchmark(void)
diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c
new file mode 100644
index 0000000000000..8dbb02f75cffc
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c
@@ -0,0 +1,283 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Isovalent */
+
+#include <sys/random.h>
+#include <argp.h>
+#include "bench.h"
+#include "bpf_hashmap_lookup.skel.h"
+#include "bpf_util.h"
+
+/* BPF triggering benchmarks */
+static struct ctx {
+	struct bpf_hashmap_lookup *skel;
+} ctx;
+
+/* only available to kernel, so define it here */
+#define BPF_MAX_LOOPS (1<<23)
+
+#define MAX_KEY_SIZE 1024 /* the size of the key map */
+
+static struct {
+	__u32 key_size;
+	__u32 map_flags;
+	__u32 max_entries;
+	__u32 nr_entries;
+	__u32 nr_loops;
+} args = {
+	.key_size = 4,
+	.map_flags = 0,
+	.max_entries = 1000,
+	.nr_entries = 500,
+	.nr_loops = 1000000,
+};
+
+enum {
+	ARG_KEY_SIZE = 8001,
+	ARG_MAP_FLAGS,
+	ARG_MAX_ENTRIES,
+	ARG_NR_ENTRIES,
+	ARG_NR_LOOPS,
+};
+
+static const struct argp_option opts[] = {
+	{ "key_size", ARG_KEY_SIZE, "KEY_SIZE", 0,
+	  "The hashmap key size (max 1024)"},
+	{ "map_flags", ARG_MAP_FLAGS, "MAP_FLAGS", 0,
+	  "The hashmap flags passed to BPF_MAP_CREATE"},
+	{ "max_entries", ARG_MAX_ENTRIES, "MAX_ENTRIES", 0,
+	  "The hashmap max entries"},
+	{ "nr_entries", ARG_NR_ENTRIES, "NR_ENTRIES", 0,
+	  "The number of entries to insert/lookup"},
+	{ "nr_loops", ARG_NR_LOOPS, "NR_LOOPS", 0,
+	  "The number of loops for the benchmark"},
+	{},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+	long ret;
+
+	switch (key) {
+	case ARG_KEY_SIZE:
+		ret = strtol(arg, NULL, 10);
+		if (ret < 1 || ret > MAX_KEY_SIZE) {
+			fprintf(stderr, "invalid key_size");
+			argp_usage(state);
+		}
+		args.key_size = ret;
+		break;
+	case ARG_MAP_FLAGS:
+		ret = strtol(arg, NULL, 0);
+		if (ret < 0 || ret > UINT_MAX) {
+			fprintf(stderr, "invalid map_flags");
+			argp_usage(state);
+		}
+		args.map_flags = ret;
+		break;
+	case ARG_MAX_ENTRIES:
+		ret = strtol(arg, NULL, 10);
+		if (ret < 1 || ret > UINT_MAX) {
+			fprintf(stderr, "invalid max_entries");
+			argp_usage(state);
+		}
+		args.max_entries = ret;
+		break;
+	case ARG_NR_ENTRIES:
+		ret = strtol(arg, NULL, 10);
+		if (ret < 1 || ret > UINT_MAX) {
+			fprintf(stderr, "invalid nr_entries");
+			argp_usage(state);
+		}
+		args.nr_entries = ret;
+		break;
+	case ARG_NR_LOOPS:
+		ret = strtol(arg, NULL, 10);
+		if (ret < 1 || ret > BPF_MAX_LOOPS) {
+			fprintf(stderr, "invalid nr_loops: %ld (min=1 max=%u)\n",
+				ret, BPF_MAX_LOOPS);
+			argp_usage(state);
+		}
+		args.nr_loops = ret;
+		break;
+	default:
+		return ARGP_ERR_UNKNOWN;
+	}
+
+	return 0;
+}
+
+const struct argp bench_hashmap_lookup_argp = {
+	.options = opts,
+	.parser = parse_arg,
+};
+
+static void validate(void)
+{
+	if (env.consumer_cnt != 1) {
+		fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
+		exit(1);
+	}
+
+	if (args.nr_entries > args.max_entries) {
+		fprintf(stderr, "args.nr_entries is too big! (max %u, got %u)\n",
+			args.max_entries, args.nr_entries);
+		exit(1);
+	}
+}
+
+static void *producer(void *input)
+{
+	while (true) {
+		/* trigger the bpf program */
+		syscall(__NR_getpgid);
+	}
+	return NULL;
+}
+
+static void *consumer(void *input)
+{
+	return NULL;
+}
+
+static void measure(struct bench_res *res)
+{
+}
+
+static inline void patch_key(u32 i, u32 *key)
+{
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	*key = i + 1;
+#else
+	*key = __builtin_bswap32(i + 1);
+#endif
+	/* the rest of key is random */
+}
+
+static void setup(void)
+{
+	struct bpf_link *link;
+	int map_fd;
+	int ret;
+	int i;
+
+	setup_libbpf();
+
+	ctx.skel = bpf_hashmap_lookup__open();
+	if (!ctx.skel) {
+		fprintf(stderr, "failed to open skeleton\n");
+		exit(1);
+	}
+
+	bpf_map__set_max_entries(ctx.skel->maps.hash_map_bench, args.max_entries);
+	bpf_map__set_key_size(ctx.skel->maps.hash_map_bench, args.key_size);
+	bpf_map__set_value_size(ctx.skel->maps.hash_map_bench, 8);
+	bpf_map__set_map_flags(ctx.skel->maps.hash_map_bench, args.map_flags);
+
+	ctx.skel->bss->nr_entries = args.nr_entries;
+	ctx.skel->bss->nr_loops = args.nr_loops / args.nr_entries;
+
+	if (args.key_size > 4) {
+		for (i = 1; i < args.key_size/4; i++)
+			ctx.skel->bss->key[i] = 2654435761 * i;
+	}
+
+	ret = bpf_hashmap_lookup__load(ctx.skel);
+	if (ret) {
+		bpf_hashmap_lookup__destroy(ctx.skel);
+		fprintf(stderr, "failed to load map: %s", strerror(-ret));
+		exit(1);
+	}
+
+	/* fill in the hash_map */
+	map_fd = bpf_map__fd(ctx.skel->maps.hash_map_bench);
+	for (u64 i = 0; i < args.nr_entries; i++) {
+		patch_key(i, ctx.skel->bss->key);
+		bpf_map_update_elem(map_fd, ctx.skel->bss->key, &i, BPF_ANY);
+	}
+
+	link = bpf_program__attach(ctx.skel->progs.benchmark);
+	if (!link) {
+		fprintf(stderr, "failed to attach program!\n");
+		exit(1);
+	}
+}
+
+static inline double events_from_time(u64 time)
+{
+	if (time)
+		return args.nr_loops * 1000000000llu / time / 1000000.0L;
+
+	return 0;
+}
+
+static int compute_events(u64 *times, double *events_mean, double *events_stddev, u64 *mean_time)
+{
+	int i, n = 0;
+
+	*events_mean = 0;
+	*events_stddev = 0;
+	*mean_time = 0;
+
+	for (i = 0; i < 32; i++) {
+		if (!times[i])
+			break;
+		*mean_time += times[i];
+		*events_mean += events_from_time(times[i]);
+		n += 1;
+	}
+	if (!n)
+		return 0;
+
+	*mean_time /= n;
+	*events_mean /= n;
+
+	if (n > 1) {
+		for (i = 0; i < n; i++) {
+			double events_i = *events_mean - events_from_time(times[i]);
+			*events_stddev += events_i * events_i / (n - 1);
+		}
+		*events_stddev = sqrt(*events_stddev);
+	}
+
+	return n;
+}
+
+static void hashmap_report_final(struct bench_res res[], int res_cnt)
+{
+	unsigned int nr_cpus = bpf_num_possible_cpus();
+	double events_mean, events_stddev;
+	u64 mean_time;
+	int i, n;
+
+	for (i = 0; i < nr_cpus; i++) {
+		n = compute_events(ctx.skel->bss->percpu_times[i], &events_mean,
+				   &events_stddev, &mean_time);
+		if (n == 0)
+			continue;
+
+		if (env.quiet) {
+			/* we expect only one cpu to be present */
+			if (env.affinity)
+				printf("%.3lf\n", events_mean);
+			else
+				printf("cpu%02d %.3lf\n", i, events_mean);
+		} else {
+			printf("cpu%02d: lookup %.3lfM ± %.3lfM events/sec"
+			       " (approximated from %d samples of ~%lums)\n",
+			       i, events_mean, 2*events_stddev,
+			       n, mean_time / 1000000);
+		}
+	}
+}
+
+const struct bench bench_bpf_hashmap_lookup = {
+	.name = "bpf-hashmap-lookup",
+	.argp = &bench_hashmap_lookup_argp,
+	.validate = validate,
+	.setup = setup,
+	.producer_thread = producer,
+	.consumer_thread = consumer,
+	.measure = measure,
+	.report_progress = NULL,
+	.report_final = hashmap_report_final,
+};
diff --git a/tools/testing/selftests/bpf/progs/bpf_hashmap_lookup.c b/tools/testing/selftests/bpf/progs/bpf_hashmap_lookup.c
new file mode 100644
index 0000000000000..1eb74ddca4144
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_hashmap_lookup.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Isovalent */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+} hash_map_bench SEC(".maps");
+
+/* The number of slots to store times */
+#define NR_SLOTS 32
+#define NR_CPUS 256
+#define CPU_MASK (NR_CPUS-1)
+
+/* Configured by userspace */
+u64 nr_entries;
+u64 nr_loops;
+u32 __attribute__((__aligned__(8))) key[NR_CPUS];
+
+/* Filled by us */
+u64 __attribute__((__aligned__(256))) percpu_times_index[NR_CPUS];
+u64 __attribute__((__aligned__(256))) percpu_times[NR_CPUS][NR_SLOTS];
+
+static inline void patch_key(u32 i)
+{
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	key[0] = i + 1;
+#else
+	key[0] = __builtin_bswap32(i + 1);
+#endif
+	/* the rest of key is random and is configured by userspace */
+}
+
+static int lookup_callback(__u32 index, u32 *unused)
+{
+	patch_key(index);
+	return bpf_map_lookup_elem(&hash_map_bench, key) ? 0 : 1;
+}
+
+static int loop_lookup_callback(__u32 index, u32 *unused)
+{
+	return bpf_loop(nr_entries, lookup_callback, NULL, 0) ? 0 : 1;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int benchmark(void *ctx)
+{
+	u32 cpu = bpf_get_smp_processor_id();
+	u32 times_index;
+	u64 start_time;
+
+	times_index = percpu_times_index[cpu & CPU_MASK] % NR_SLOTS;
+	start_time = bpf_ktime_get_ns();
+	bpf_loop(nr_loops, loop_lookup_callback, NULL, 0);
+	percpu_times[cpu & CPU_MASK][times_index] = bpf_ktime_get_ns() - start_time;
+	percpu_times_index[cpu & CPU_MASK] += 1;
+	return 0;
+}