From f96afa767baffba7645f5e10998f5178948bb9aa Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Wed, 7 Nov 2018 12:28:45 +0000
Subject: [PATCH 01/71] selftests/bpf: enable (uncomment) all tests in
 test_libbpf.sh

libbpf is now able to load successfully test_l4lb_noinline.o and
samples/bpf/tracex3_kern.o.

For the test_l4lb_noinline, uncomment related tests from test_libbpf.c
and remove the associated "TODO".

For tracex3_kern.o, instead of loading a program from samples/bpf/ that
might not have been compiled at this stage, try loading a program from
BPF selftests. Since this test case is about loading a program compiled
without the "-target bpf" flag, change the Makefile to compile one
program accordingly (instead of passing the flag for compiling all
programs).

Regarding test_xdp_noinline.o: in its current shape the program fails to
load because it provides no version section, but the loader needs one.
The test was added to make sure that libbpf could load XDP programs even
if they do not provide a version number in a dedicated section. But
libbpf is already capable of doing that: in our case loading fails
because the loader does not know that this is an XDP program (it does
not need to, since it does not attach the program). So trying to load
test_xdp_noinline.o does not bring much here: just delete this subtest.

For the record, the error message obtained with tracex3_kern.o was
fixed by commit e3d91b0ca523 ("tools/libbpf: handle issues with bpf ELF
objects containing .eh_frames")

I have not been abled to reproduce the "libbpf: incorrect bpf_call
opcode" error for test_l4lb_noinline.o, even with the version of libbpf
present at the time when test_libbpf.sh and test_libbpf_open.c were
created.

RFC -> v1:
- Compile test_xdp without the "-target bpf" flag, and try to load it
  instead of ../../samples/bpf/tracex3_kern.o.
- Delete test_xdp_noinline.o subtest.

Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/Makefile       | 10 ++++++++++
 tools/testing/selftests/bpf/test_libbpf.sh | 14 ++++----------
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index e39dfb4e7970f..ecd79b7fb1073 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -135,6 +135,16 @@ endif
 endif
 endif
 
+# Have one program compiled without "-target bpf" to test whether libbpf loads
+# it successfully
+$(OUTPUT)/test_xdp.o: test_xdp.c
+	$(CLANG) $(CLANG_FLAGS) \
+		-O2 -emit-llvm -c $< -o - | \
+	$(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@
+ifeq ($(DWARF2BTF),y)
+	$(BTF_PAHOLE) -J $@
+endif
+
 $(OUTPUT)/%.o: %.c
 	$(CLANG) $(CLANG_FLAGS) \
 		 -O2 -target bpf -emit-llvm -c $< -o - |      \
diff --git a/tools/testing/selftests/bpf/test_libbpf.sh b/tools/testing/selftests/bpf/test_libbpf.sh
index 156d89f1edcc4..2989b2e2d856d 100755
--- a/tools/testing/selftests/bpf/test_libbpf.sh
+++ b/tools/testing/selftests/bpf/test_libbpf.sh
@@ -33,17 +33,11 @@ trap exit_handler 0 2 3 6 9
 
 libbpf_open_file test_l4lb.o
 
-# TODO: fix libbpf to load noinline functions
-# [warning] libbpf: incorrect bpf_call opcode
-#libbpf_open_file test_l4lb_noinline.o
+# Load a program with BPF-to-BPF calls
+libbpf_open_file test_l4lb_noinline.o
 
-# TODO: fix test_xdp_meta.c to load with libbpf
-# [warning] libbpf: test_xdp_meta.o doesn't provide kernel version
-#libbpf_open_file test_xdp_meta.o
-
-# TODO: fix libbpf to handle .eh_frame
-# [warning] libbpf: relocation failed: no section(10)
-#libbpf_open_file ../../../../samples/bpf/tracex3_kern.o
+# Load a program compiled without the "-target bpf" flag
+libbpf_open_file test_xdp.o
 
 # Success
 exit 0

From 8302b9bd31d29f29dd24dd6b1e1e5682c302c11c Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Wed, 7 Nov 2018 12:29:30 +0000
Subject: [PATCH 02/71] tools: bpftool: adjust rlimit RLIMIT_MEMLOCK when
 loading programs, maps

The limit for memory locked in the kernel by a process is usually set to
64 kbytes by default. This can be an issue when creating large BPF maps
and/or loading many programs. A workaround is to raise this limit for
the current process before trying to create a new BPF map. Changing the
hard limit requires the CAP_SYS_RESOURCE and can usually only be done by
root user (for non-root users, a call to setrlimit fails (and sets
errno) and the program simply goes on with its rlimit unchanged).

There is no API to get the current amount of memory locked for a user,
therefore we cannot raise the limit only when required. One solution,
used by bcc, is to try to create the map, and on getting a EPERM error,
raising the limit to infinity before giving another try. Another
approach, used in iproute2, is to raise the limit in all cases, before
trying to create the map.

Here we do the same as in iproute2: the rlimit is raised to infinity
before trying to load programs or to create maps with bpftool.

Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/common.c | 8 ++++++++
 tools/bpf/bpftool/main.h   | 2 ++
 tools/bpf/bpftool/map.c    | 2 ++
 tools/bpf/bpftool/prog.c   | 2 ++
 4 files changed, 14 insertions(+)

diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index 25af85304ebee..1149565be4b16 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -46,6 +46,7 @@
 #include <linux/magic.h>
 #include <net/if.h>
 #include <sys/mount.h>
+#include <sys/resource.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <sys/vfs.h>
@@ -99,6 +100,13 @@ static bool is_bpffs(char *path)
 	return (unsigned long)st_fs.f_type == BPF_FS_MAGIC;
 }
 
+void set_max_rlimit(void)
+{
+	struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
+
+	setrlimit(RLIMIT_MEMLOCK, &rinf);
+}
+
 static int mnt_bpffs(const char *target, char *buff, size_t bufflen)
 {
 	bool bind_done = false;
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 28322ace28565..14857c273bf67 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -100,6 +100,8 @@ bool is_prefix(const char *pfx, const char *str);
 void fprint_hex(FILE *f, void *arg, unsigned int n, const char *sep);
 void usage(void) __noreturn;
 
+void set_max_rlimit(void);
+
 struct pinned_obj_table {
 	DECLARE_HASHTABLE(table, 16);
 };
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index 7bf38f0e152e0..101b8a8812259 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -1140,6 +1140,8 @@ static int do_create(int argc, char **argv)
 		return -1;
 	}
 
+	set_max_rlimit();
+
 	fd = bpf_create_map_xattr(&attr);
 	if (fd < 0) {
 		p_err("map create failed: %s", strerror(errno));
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 5302ee282409e..b9b84553bec44 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -995,6 +995,8 @@ static int do_load(int argc, char **argv)
 		goto err_close_obj;
 	}
 
+	set_max_rlimit();
+
 	err = bpf_object__load(obj);
 	if (err) {
 		p_err("failed to load object file");

From bce6a14996f991e570d973179b5ff57544efaa9a Mon Sep 17 00:00:00 2001
From: Shannon Nelson <shannon.nelson@oracle.com>
Date: Mon, 29 Oct 2018 14:14:41 -0700
Subject: [PATCH 03/71] bpf_load: add map name to load_maps error message

To help when debugging bpf/xdp load issues, have the load_map()
error message include the number and name of the map that
failed.

Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 samples/bpf/bpf_load.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index e6d7e0fe155b4..5c052b9ea63fe 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -284,8 +284,8 @@ static int load_maps(struct bpf_map_data *maps, int nr_maps,
 							numa_node);
 		}
 		if (map_fd[i] < 0) {
-			printf("failed to create a map: %d %s\n",
-			       errno, strerror(errno));
+			printf("failed to create map %d (%s): %d %s\n",
+			       i, maps[i].name, errno, strerror(errno));
 			return 1;
 		}
 		maps[i].fd = map_fd[i];

From e647815a4d3b3be9d85b5750ed0f2947fd78fac7 Mon Sep 17 00:00:00 2001
From: Jiong Wang <jiong.wang@netronome.com>
Date: Thu, 8 Nov 2018 04:08:42 -0500
Subject: [PATCH 04/71] bpf: let verifier to calculate and record
 max_pkt_offset

In check_packet_access, update max_pkt_offset after the offset has passed
__check_packet_access.

It should be safe to use u32 for max_pkt_offset as explained in code
comment.

Also, when there is tail call, the max_pkt_offset of the called program is
unknown, so conservatively set max_pkt_offset to MAX_PACKET_OFF for such
case.

Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf.h   |  1 +
 kernel/bpf/verifier.c | 12 ++++++++++++
 2 files changed, 13 insertions(+)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 33014ae73103f..b6a296e01f6a9 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -293,6 +293,7 @@ struct bpf_prog_aux {
 	atomic_t refcnt;
 	u32 used_map_cnt;
 	u32 max_ctx_offset;
+	u32 max_pkt_offset;
 	u32 stack_depth;
 	u32 id;
 	u32 func_cnt;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 1971ca325fb4e..75dab40b19a3e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1455,6 +1455,17 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
 		verbose(env, "R%d offset is outside of the packet\n", regno);
 		return err;
 	}
+
+	/* __check_packet_access has made sure "off + size - 1" is within u16.
+	 * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
+	 * otherwise find_good_pkt_pointers would have refused to set range info
+	 * that __check_packet_access would have rejected this pkt access.
+	 * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
+	 */
+	env->prog->aux->max_pkt_offset =
+		max_t(u32, env->prog->aux->max_pkt_offset,
+		      off + reg->umax_value + size - 1);
+
 	return err;
 }
 
@@ -6138,6 +6149,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
 			 */
 			prog->cb_access = 1;
 			env->prog->aux->stack_depth = MAX_BPF_STACK;
+			env->prog->aux->max_pkt_offset = MAX_PACKET_OFF;
 
 			/* mark bpf_tail_call as different opcode to avoid
 			 * conditional branch in the interpeter for every normal

From cf599f50311dd4a5d3b5fc427beb9303dfa6be4b Mon Sep 17 00:00:00 2001
From: Jiong Wang <jiong.wang@netronome.com>
Date: Thu, 8 Nov 2018 04:08:43 -0500
Subject: [PATCH 05/71] nfp: bpf: relax prog rejection through max_pkt_offset

NFP is refusing to offload programs whenever the MTU is set to a value
larger than the max packet bytes that fits in NFP Cluster Target Memory
(CTM). However, a eBPF program doesn't always need to access the whole
packet data.

Verifier has always calculated maximum direct packet access (DPA) offset,
and kept it in max_pkt_offset inside prog auxiliar information. This patch
relax prog rejection based on max_pkt_offset.

Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 drivers/net/ethernet/netronome/nfp/bpf/offload.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
index ba8ceedcf6a28..07bdc1f61996b 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
@@ -489,14 +489,15 @@ nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog,
 		 struct netlink_ext_ack *extack)
 {
 	struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv;
-	unsigned int max_mtu, max_stack, max_prog_len;
+	unsigned int fw_mtu, pkt_off, max_stack, max_prog_len;
 	dma_addr_t dma_addr;
 	void *img;
 	int err;
 
-	max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32;
-	if (max_mtu < nn->dp.netdev->mtu) {
-		NL_SET_ERR_MSG_MOD(extack, "BPF offload not supported with MTU larger than HW packet split boundary");
+	fw_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32;
+	pkt_off = min(prog->aux->max_pkt_offset, nn->dp.netdev->mtu);
+	if (fw_mtu < pkt_off) {
+		NL_SET_ERR_MSG_MOD(extack, "BPF offload not supported with potential packet access beyond HW packet split boundary");
 		return -EOPNOTSUPP;
 	}
 

From a5a3a828cd00788a78da686c57c6d1f66191d8af Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Wed, 7 Nov 2018 16:12:01 -0800
Subject: [PATCH 06/71] bpf: add perf event notificaton support for sock_ops

This patch allows eBPF programs that use sock_ops to send perf
based event notifications using bpf_perf_event_output(). Our main
use case for this is the following:

  We would like to monitor some subset of TCP sockets in user-space,
  (the monitoring application would define 4-tuples it wants to monitor)
  using TCP_INFO stats to analyze reported problems. The idea is to
  use those stats to see where the bottlenecks are likely to be ("is
  it application-limited?" or "is there evidence of BufferBloat in
  the path?" etc).

  Today we can do this by periodically polling for tcp_info, but this
  could be made more efficient if the kernel would asynchronously
  notify the application via tcp_info when some "interesting"
  thresholds (e.g., "RTT variance > X", or "total_retrans > Y" etc)
  are reached. And to make this effective, it is better if
  we could apply the threshold check *before* constructing the
  tcp_info netlink notification, so that we don't waste resources
  constructing notifications that will be discarded by the filter.

This work solves the problem by adding perf event based notification
support for sock_ops. The eBPF program can thus be designed to apply
any desired filters to the bpf_sock_ops and trigger a perf event
notification based on the evaluation from the filter. The user space
component can use these perf event notifications to either read any
state managed by the eBPF program, or issue a TCP_INFO netlink call
if desired.

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Co-developed-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 net/core/filter.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/net/core/filter.c b/net/core/filter.c
index e521c5ebc7d11..ba97a6bee6f92 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3908,6 +3908,26 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = {
 	.arg1_type      = ARG_PTR_TO_CTX,
 };
 
+BPF_CALL_5(bpf_sockopt_event_output, struct bpf_sock_ops_kern *, bpf_sock,
+	   struct bpf_map *, map, u64, flags, void *, data, u64, size)
+{
+	if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
+		return -EINVAL;
+
+	return bpf_event_output(map, flags, data, size, NULL, 0, NULL);
+}
+
+static const struct bpf_func_proto bpf_sockopt_event_output_proto =  {
+	.func		= bpf_sockopt_event_output,
+	.gpl_only       = true,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_CONST_MAP_PTR,
+	.arg3_type      = ARG_ANYTHING,
+	.arg4_type      = ARG_PTR_TO_MEM,
+	.arg5_type      = ARG_CONST_SIZE_OR_ZERO,
+};
+
 BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
 	   int, level, int, optname, char *, optval, int, optlen)
 {
@@ -5240,6 +5260,8 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_get_socket_cookie_sock_ops_proto;
 	case BPF_FUNC_get_local_storage:
 		return &bpf_get_local_storage_proto;
+	case BPF_FUNC_perf_event_output:
+		return &bpf_sockopt_event_output_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}

From 435f90a338ae42c0d7c0109a1742d1b16bc99bf6 Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Wed, 7 Nov 2018 16:12:02 -0800
Subject: [PATCH 07/71] selftests/bpf: add a test case for sock_ops perf-event
 notification

This patch provides a tcp_bpf based eBPF sample. The test

- ncat(1) as the TCP client program to connect() to a port
  with the intention of triggerring SYN retransmissions: we
  first install an iptables DROP rule to make sure ncat SYNs are
  resent (instead of aborting instantly after a TCP RST)

- has a bpf kernel module that sends a perf-event notification for
  each TCP retransmit, and also tracks the number of such notifications
  sent in the global_map

The test passes when the number of event notifications intercepted
in user-space matches the value in the global_map.

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/Makefile          |   4 +-
 tools/testing/selftests/bpf/test_tcpnotify.h  |  19 ++
 .../selftests/bpf/test_tcpnotify_kern.c       |  95 +++++++++
 .../selftests/bpf/test_tcpnotify_user.c       | 186 ++++++++++++++++++
 4 files changed, 303 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/test_tcpnotify.h
 create mode 100644 tools/testing/selftests/bpf/test_tcpnotify_kern.c
 create mode 100644 tools/testing/selftests/bpf/test_tcpnotify_user.c

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index ecd79b7fb1073..57b4712a62762 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -24,12 +24,13 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
 	test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
 	test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \
 	test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names \
-	test_netcnt
+	test_netcnt test_tcpnotify_user
 
 TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
 	test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o     \
 	sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
 	test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
+	test_tcpnotify_kern.o \
 	sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \
 	sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \
 	test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \
@@ -74,6 +75,7 @@ $(OUTPUT)/test_sock_addr: cgroup_helpers.c
 $(OUTPUT)/test_socket_cookie: cgroup_helpers.c
 $(OUTPUT)/test_sockmap: cgroup_helpers.c
 $(OUTPUT)/test_tcpbpf_user: cgroup_helpers.c
+$(OUTPUT)/test_tcpnotify_user: cgroup_helpers.c trace_helpers.c
 $(OUTPUT)/test_progs: trace_helpers.c
 $(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c
 $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
diff --git a/tools/testing/selftests/bpf/test_tcpnotify.h b/tools/testing/selftests/bpf/test_tcpnotify.h
new file mode 100644
index 0000000000000..8b6cea030bfc3
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tcpnotify.h
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifndef _TEST_TCPBPF_H
+#define _TEST_TCPBPF_H
+
+struct tcpnotify_globals {
+	__u32 total_retrans;
+	__u32 ncalls;
+};
+
+struct tcp_notifier {
+	__u8    type;
+	__u8    subtype;
+	__u8    source;
+	__u8    hash;
+};
+
+#define	TESTPORT	12877
+#endif
diff --git a/tools/testing/selftests/bpf/test_tcpnotify_kern.c b/tools/testing/selftests/bpf/test_tcpnotify_kern.c
new file mode 100644
index 0000000000000..edbca203ce2dd
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tcpnotify_kern.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/tcp.h>
+#include <netinet/in.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+#include "test_tcpnotify.h"
+
+struct bpf_map_def SEC("maps") global_map = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(struct tcpnotify_globals),
+	.max_entries = 4,
+};
+
+struct bpf_map_def SEC("maps") perf_event_map = {
+	.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(__u32),
+	.max_entries = 2,
+};
+
+int _version SEC("version") = 1;
+
+SEC("sockops")
+int bpf_testcb(struct bpf_sock_ops *skops)
+{
+	int rv = -1;
+	int op;
+
+	op = (int) skops->op;
+
+	if (bpf_ntohl(skops->remote_port) != TESTPORT) {
+		skops->reply = -1;
+		return 0;
+	}
+
+	switch (op) {
+	case BPF_SOCK_OPS_TIMEOUT_INIT:
+	case BPF_SOCK_OPS_RWND_INIT:
+	case BPF_SOCK_OPS_NEEDS_ECN:
+	case BPF_SOCK_OPS_BASE_RTT:
+	case BPF_SOCK_OPS_RTO_CB:
+		rv = 1;
+		break;
+
+	case BPF_SOCK_OPS_TCP_CONNECT_CB:
+	case BPF_SOCK_OPS_TCP_LISTEN_CB:
+	case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+	case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+		bpf_sock_ops_cb_flags_set(skops, (BPF_SOCK_OPS_RETRANS_CB_FLAG|
+					  BPF_SOCK_OPS_RTO_CB_FLAG));
+		rv = 1;
+		break;
+	case BPF_SOCK_OPS_RETRANS_CB: {
+			__u32 key = 0;
+			struct tcpnotify_globals g, *gp;
+			struct tcp_notifier msg = {
+				.type = 0xde,
+				.subtype = 0xad,
+				.source = 0xbe,
+				.hash = 0xef,
+			};
+
+			rv = 1;
+
+			/* Update results */
+			gp = bpf_map_lookup_elem(&global_map, &key);
+			if (!gp)
+				break;
+			g = *gp;
+			g.total_retrans = skops->total_retrans;
+			g.ncalls++;
+			bpf_map_update_elem(&global_map, &key, &g,
+					    BPF_ANY);
+			bpf_perf_event_output(skops, &perf_event_map,
+					      BPF_F_CURRENT_CPU,
+					      &msg, sizeof(msg));
+		}
+		break;
+	default:
+		rv = -1;
+	}
+	skops->reply = rv;
+	return 1;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c
new file mode 100644
index 0000000000000..ff3c4522aed68
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c
@@ -0,0 +1,186 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <asm/types.h>
+#include <sys/syscall.h>
+#include <errno.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <sys/socket.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include <sys/ioctl.h>
+#include <linux/rtnetlink.h>
+#include <signal.h>
+#include <linux/perf_event.h>
+
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+#include "cgroup_helpers.h"
+
+#include "test_tcpnotify.h"
+#include "trace_helpers.h"
+
+#define SOCKET_BUFFER_SIZE (getpagesize() < 8192L ? getpagesize() : 8192L)
+
+pthread_t tid;
+int rx_callbacks;
+
+static int dummyfn(void *data, int size)
+{
+	struct tcp_notifier *t = data;
+
+	if (t->type != 0xde || t->subtype != 0xad ||
+	    t->source != 0xbe || t->hash != 0xef)
+		return 1;
+	rx_callbacks++;
+	return 0;
+}
+
+void tcp_notifier_poller(int fd)
+{
+	while (1)
+		perf_event_poller(fd, dummyfn);
+}
+
+static void *poller_thread(void *arg)
+{
+	int fd = *(int *)arg;
+
+	tcp_notifier_poller(fd);
+	return arg;
+}
+
+int verify_result(const struct tcpnotify_globals *result)
+{
+	return (result->ncalls > 0 && result->ncalls == rx_callbacks ? 0 : 1);
+}
+
+static int bpf_find_map(const char *test, struct bpf_object *obj,
+			const char *name)
+{
+	struct bpf_map *map;
+
+	map = bpf_object__find_map_by_name(obj, name);
+	if (!map) {
+		printf("%s:FAIL:map '%s' not found\n", test, name);
+		return -1;
+	}
+	return bpf_map__fd(map);
+}
+
+static int setup_bpf_perf_event(int mapfd)
+{
+	struct perf_event_attr attr = {
+		.sample_type = PERF_SAMPLE_RAW,
+		.type = PERF_TYPE_SOFTWARE,
+		.config = PERF_COUNT_SW_BPF_OUTPUT,
+	};
+	int key = 0;
+	int pmu_fd;
+
+	pmu_fd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, 0);
+	if (pmu_fd < 0)
+		return pmu_fd;
+	bpf_map_update_elem(mapfd, &key, &pmu_fd, BPF_ANY);
+
+	ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+	return pmu_fd;
+}
+
+int main(int argc, char **argv)
+{
+	const char *file = "test_tcpnotify_kern.o";
+	int prog_fd, map_fd, perf_event_fd;
+	struct tcpnotify_globals g = {0};
+	const char *cg_path = "/foo";
+	int error = EXIT_FAILURE;
+	struct bpf_object *obj;
+	int cg_fd = -1;
+	__u32 key = 0;
+	int rv;
+	char test_script[80];
+	int pmu_fd;
+	cpu_set_t cpuset;
+
+	CPU_ZERO(&cpuset);
+	CPU_SET(0, &cpuset);
+	pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
+
+	if (setup_cgroup_environment())
+		goto err;
+
+	cg_fd = create_and_get_cgroup(cg_path);
+	if (!cg_fd)
+		goto err;
+
+	if (join_cgroup(cg_path))
+		goto err;
+
+	if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) {
+		printf("FAILED: load_bpf_file failed for: %s\n", file);
+		goto err;
+	}
+
+	rv = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_SOCK_OPS, 0);
+	if (rv) {
+		printf("FAILED: bpf_prog_attach: %d (%s)\n",
+		       error, strerror(errno));
+		goto err;
+	}
+
+	perf_event_fd = bpf_find_map(__func__, obj, "perf_event_map");
+	if (perf_event_fd < 0)
+		goto err;
+
+	map_fd = bpf_find_map(__func__, obj, "global_map");
+	if (map_fd < 0)
+		goto err;
+
+	pmu_fd = setup_bpf_perf_event(perf_event_fd);
+	if (pmu_fd < 0 || perf_event_mmap(pmu_fd) < 0)
+		goto err;
+
+	pthread_create(&tid, NULL, poller_thread, (void *)&pmu_fd);
+
+	sprintf(test_script,
+		"/usr/sbin/iptables -A INPUT -p tcp --dport %d -j DROP",
+		TESTPORT);
+	system(test_script);
+
+	sprintf(test_script,
+		"/usr/bin/nc 127.0.0.1 %d < /etc/passwd > /dev/null 2>&1 ",
+		TESTPORT);
+	system(test_script);
+
+	sprintf(test_script,
+		"/usr/sbin/iptables -D INPUT -p tcp --dport %d -j DROP",
+		TESTPORT);
+	system(test_script);
+
+	rv = bpf_map_lookup_elem(map_fd, &key, &g);
+	if (rv != 0) {
+		printf("FAILED: bpf_map_lookup_elem returns %d\n", rv);
+		goto err;
+	}
+
+	sleep(10);
+
+	if (verify_result(&g)) {
+		printf("FAILED: Wrong stats Expected %d calls, got %d\n",
+			g.ncalls, rx_callbacks);
+		goto err;
+	}
+
+	printf("PASSED!\n");
+	error = 0;
+err:
+	bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS);
+	close(cg_fd);
+	cleanup_cgroup_environment();
+	return error;
+}

From bf598a8f0f771302d4ecb0ef0003c54732221597 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Thu, 8 Nov 2018 13:00:07 -0800
Subject: [PATCH 08/71] bpftool: Improve handling of ENOENT on map dumps

bpftool output is not user friendly when dumping a map with only a few
populated entries:

    $ bpftool map
    1: devmap  name tx_devmap  flags 0x0
            key 4B  value 4B  max_entries 64  memlock 4096B
    2: array  name tx_idxmap  flags 0x0
            key 4B  value 4B  max_entries 64  memlock 4096B

    $ bpftool map dump id 1
    key:
    00 00 00 00
    value:
    No such file or directory
    key:
    01 00 00 00
    value:
    No such file or directory
    key:
    02 00 00 00
    value:
    No such file or directory
    key: 03 00 00 00  value: 03 00 00 00

Handle ENOENT by keeping the line format sane and dumping
"<no entry>" for the value

    $ bpftool map dump id 1
    key: 00 00 00 00  value: <no entry>
    key: 01 00 00 00  value: <no entry>
    key: 02 00 00 00  value: <no entry>
    key: 03 00 00 00  value: 03 00 00 00
    ...

Signed-off-by: David Ahern <dsahern@gmail.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/map.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index 101b8a8812259..dc9a8967ab8ca 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -383,7 +383,10 @@ static void print_entry_plain(struct bpf_map_info *info, unsigned char *key,
 		printf(single_line ? "  " : "\n");
 
 		printf("value:%c", break_names ? '\n' : ' ');
-		fprint_hex(stdout, value, info->value_size, " ");
+		if (value)
+			fprint_hex(stdout, value, info->value_size, " ");
+		else
+			printf("<no entry>");
 
 		printf("\n");
 	} else {
@@ -398,8 +401,11 @@ static void print_entry_plain(struct bpf_map_info *info, unsigned char *key,
 		for (i = 0; i < n; i++) {
 			printf("value (CPU %02d):%c",
 			       i, info->value_size > 16 ? '\n' : ' ');
-			fprint_hex(stdout, value + i * step,
-				   info->value_size, " ");
+			if (value)
+				fprint_hex(stdout, value + i * step,
+					   info->value_size, " ");
+			else
+				printf("<no entry>");
 			printf("\n");
 		}
 	}
@@ -731,7 +737,11 @@ static int dump_map_elem(int fd, void *key, void *value,
 		jsonw_string_field(json_wtr, "error", strerror(lookup_errno));
 		jsonw_end_object(json_wtr);
 	} else {
-		print_entry_error(map_info, key, strerror(lookup_errno));
+		if (errno == ENOENT)
+			print_entry_plain(map_info, key, NULL);
+		else
+			print_entry_error(map_info, key,
+					  strerror(lookup_errno));
 	}
 
 	return 0;

From c8123ead13a5c92dc5fd15c0fdfe88eef41e6ac1 Mon Sep 17 00:00:00 2001
From: Nitin Hande <nitin.hande@gmail.com>
Date: Sun, 28 Oct 2018 21:02:45 -0700
Subject: [PATCH 09/71] bpf: Extend the sk_lookup() helper to XDP hookpoint.

This patch proposes to extend the sk_lookup() BPF API to the
XDP hookpoint. The sk_lookup() helper supports a lookup
on incoming packet to find the corresponding socket that will
receive this packet. Current support for this BPF API is
at the tc hookpoint. This patch will extend this API at XDP
hookpoint. A XDP program can map the incoming packet to the
5-tuple parameter and invoke the API to find the corresponding
socket structure.

Signed-off-by: Nitin Hande <Nitin.Hande@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/uapi/linux/bpf.h |   4 ++
 net/core/filter.c        | 105 ++++++++++++++++++++++++++++++++-------
 2 files changed, 90 insertions(+), 19 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 852dc17ab47a0..47d606d744cc6 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2201,6 +2201,8 @@ union bpf_attr {
  *		**CONFIG_NET** configuration option.
  *	Return
  *		Pointer to *struct bpf_sock*, or NULL in case of failure.
+ *		For sockets with reuseport option, *struct bpf_sock*
+ *		return is from reuse->socks[] using hash of the packet.
  *
  * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags)
  *	Description
@@ -2233,6 +2235,8 @@ union bpf_attr {
  *		**CONFIG_NET** configuration option.
  *	Return
  *		Pointer to *struct bpf_sock*, or NULL in case of failure.
+ *		For sockets with reuseport option, *struct bpf_sock*
+ *		return is from reuse->socks[] using hash of the packet.
  *
  * int bpf_sk_release(struct bpf_sock *sk)
  *	Description
diff --git a/net/core/filter.c b/net/core/filter.c
index ba97a6bee6f92..53d50fb75ea18 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4845,38 +4845,32 @@ static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
 
 #ifdef CONFIG_INET
 static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
-			      struct sk_buff *skb, u8 family, u8 proto)
+			      int dif, int sdif, u8 family, u8 proto)
 {
 	bool refcounted = false;
 	struct sock *sk = NULL;
-	int dif = 0;
-
-	if (skb->dev)
-		dif = skb->dev->ifindex;
 
 	if (family == AF_INET) {
 		__be32 src4 = tuple->ipv4.saddr;
 		__be32 dst4 = tuple->ipv4.daddr;
-		int sdif = inet_sdif(skb);
 
 		if (proto == IPPROTO_TCP)
-			sk = __inet_lookup(net, &tcp_hashinfo, skb, 0,
+			sk = __inet_lookup(net, &tcp_hashinfo, NULL, 0,
 					   src4, tuple->ipv4.sport,
 					   dst4, tuple->ipv4.dport,
 					   dif, sdif, &refcounted);
 		else
 			sk = __udp4_lib_lookup(net, src4, tuple->ipv4.sport,
 					       dst4, tuple->ipv4.dport,
-					       dif, sdif, &udp_table, skb);
+					       dif, sdif, &udp_table, NULL);
 #if IS_ENABLED(CONFIG_IPV6)
 	} else {
 		struct in6_addr *src6 = (struct in6_addr *)&tuple->ipv6.saddr;
 		struct in6_addr *dst6 = (struct in6_addr *)&tuple->ipv6.daddr;
 		u16 hnum = ntohs(tuple->ipv6.dport);
-		int sdif = inet6_sdif(skb);
 
 		if (proto == IPPROTO_TCP)
-			sk = __inet6_lookup(net, &tcp_hashinfo, skb, 0,
+			sk = __inet6_lookup(net, &tcp_hashinfo, NULL, 0,
 					    src6, tuple->ipv6.sport,
 					    dst6, hnum,
 					    dif, sdif, &refcounted);
@@ -4885,7 +4879,7 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
 							    src6, tuple->ipv6.sport,
 							    dst6, hnum,
 							    dif, sdif,
-							    &udp_table, skb);
+							    &udp_table, NULL);
 #endif
 	}
 
@@ -4902,31 +4896,33 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
  * callers to satisfy BPF_CALL declarations.
  */
 static unsigned long
-bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
-	      u8 proto, u64 netns_id, u64 flags)
+__bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
+		struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
+		u64 flags)
 {
-	struct net *caller_net;
 	struct sock *sk = NULL;
 	u8 family = AF_UNSPEC;
 	struct net *net;
+	int sdif;
 
 	family = len == sizeof(tuple->ipv4) ? AF_INET : AF_INET6;
 	if (unlikely(family == AF_UNSPEC || netns_id > U32_MAX || flags))
 		goto out;
 
-	if (skb->dev)
-		caller_net = dev_net(skb->dev);
+	if (family == AF_INET)
+		sdif = inet_sdif(skb);
 	else
-		caller_net = sock_net(skb->sk);
+		sdif = inet6_sdif(skb);
+
 	if (netns_id) {
 		net = get_net_ns_by_id(caller_net, netns_id);
 		if (unlikely(!net))
 			goto out;
-		sk = sk_lookup(net, tuple, skb, family, proto);
+		sk = sk_lookup(net, tuple, ifindex, sdif, family, proto);
 		put_net(net);
 	} else {
 		net = caller_net;
-		sk = sk_lookup(net, tuple, skb, family, proto);
+		sk = sk_lookup(net, tuple, ifindex, sdif, family, proto);
 	}
 
 	if (sk)
@@ -4935,6 +4931,25 @@ bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
 	return (unsigned long) sk;
 }
 
+static unsigned long
+bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
+	      u8 proto, u64 netns_id, u64 flags)
+{
+	struct net *caller_net;
+	int ifindex;
+
+	if (skb->dev) {
+		caller_net = dev_net(skb->dev);
+		ifindex = skb->dev->ifindex;
+	} else {
+		caller_net = sock_net(skb->sk);
+		ifindex = 0;
+	}
+
+	return __bpf_sk_lookup(skb, tuple, len, caller_net, ifindex,
+			      proto, netns_id, flags);
+}
+
 BPF_CALL_5(bpf_sk_lookup_tcp, struct sk_buff *, skb,
 	   struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
 {
@@ -4984,6 +4999,50 @@ static const struct bpf_func_proto bpf_sk_release_proto = {
 	.ret_type	= RET_INTEGER,
 	.arg1_type	= ARG_PTR_TO_SOCKET,
 };
+
+BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
+	   struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
+{
+	struct net *caller_net = dev_net(ctx->rxq->dev);
+	int ifindex = ctx->rxq->dev->ifindex;
+
+	return __bpf_sk_lookup(NULL, tuple, len, caller_net, ifindex,
+			      IPPROTO_UDP, netns_id, flags);
+}
+
+static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = {
+	.func           = bpf_xdp_sk_lookup_udp,
+	.gpl_only       = false,
+	.pkt_access     = true,
+	.ret_type       = RET_PTR_TO_SOCKET_OR_NULL,
+	.arg1_type      = ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_PTR_TO_MEM,
+	.arg3_type      = ARG_CONST_SIZE,
+	.arg4_type      = ARG_ANYTHING,
+	.arg5_type      = ARG_ANYTHING,
+};
+
+BPF_CALL_5(bpf_xdp_sk_lookup_tcp, struct xdp_buff *, ctx,
+	   struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
+{
+	struct net *caller_net = dev_net(ctx->rxq->dev);
+	int ifindex = ctx->rxq->dev->ifindex;
+
+	return __bpf_sk_lookup(NULL, tuple, len, caller_net, ifindex,
+			      IPPROTO_TCP, netns_id, flags);
+}
+
+static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = {
+	.func           = bpf_xdp_sk_lookup_tcp,
+	.gpl_only       = false,
+	.pkt_access     = true,
+	.ret_type       = RET_PTR_TO_SOCKET_OR_NULL,
+	.arg1_type      = ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_PTR_TO_MEM,
+	.arg3_type      = ARG_CONST_SIZE,
+	.arg4_type      = ARG_ANYTHING,
+	.arg5_type      = ARG_ANYTHING,
+};
 #endif /* CONFIG_INET */
 
 bool bpf_helper_changes_pkt_data(void *func)
@@ -5234,6 +5293,14 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_xdp_adjust_tail_proto;
 	case BPF_FUNC_fib_lookup:
 		return &bpf_xdp_fib_lookup_proto;
+#ifdef CONFIG_INET
+	case BPF_FUNC_sk_lookup_udp:
+		return &bpf_xdp_sk_lookup_udp_proto;
+	case BPF_FUNC_sk_lookup_tcp:
+		return &bpf_xdp_sk_lookup_tcp_proto;
+	case BPF_FUNC_sk_release:
+		return &bpf_sk_release_proto;
+#endif
 	default:
 		return bpf_base_func_proto(func_id);
 	}

From 1da6f5733853fb230265fe1a7d1b5373f13bf5ca Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Fri, 9 Nov 2018 13:03:24 +0000
Subject: [PATCH 10/71] nfp: bpf: move nfp_bpf_analyzer_ops from verifier.c to
 offload.c

We are about to add several new callbacks to the struct, all of them
defined in offload.c. Move the struct bpf_prog_offload_ops object in
that file. As a consequence, nfp_verify_insn() and nfp_finalize() can no
longer be static.

Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 drivers/net/ethernet/netronome/nfp/bpf/main.h     |  4 ++++
 drivers/net/ethernet/netronome/nfp/bpf/offload.c  |  5 +++++
 drivers/net/ethernet/netronome/nfp/bpf/verifier.c | 11 +++--------
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index 7f591d71ab28d..abdd93d144396 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -509,6 +509,10 @@ void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog, unsigned int cnt);
 int nfp_bpf_jit(struct nfp_prog *prog);
 bool nfp_bpf_supported_opcode(u8 code);
 
+int nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx,
+		    int prev_insn_idx);
+int nfp_bpf_finalize(struct bpf_verifier_env *env);
+
 extern const struct bpf_prog_offload_ops nfp_bpf_analyzer_ops;
 
 struct netdev_bpf;
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
index 07bdc1f61996b..dc548bb4089e2 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
@@ -601,3 +601,8 @@ int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog,
 
 	return 0;
 }
+
+const struct bpf_prog_offload_ops nfp_bpf_analyzer_ops = {
+	.insn_hook	= nfp_verify_insn,
+	.finalize	= nfp_bpf_finalize,
+};
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
index 99f977bfd8ccd..337bb862ec1dd 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
@@ -623,8 +623,8 @@ nfp_bpf_check_alu(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
 	return 0;
 }
 
-static int
-nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
+int nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx,
+		    int prev_insn_idx)
 {
 	struct nfp_prog *nfp_prog = env->prog->aux->offload->dev_priv;
 	struct nfp_insn_meta *meta = nfp_prog->verifier_meta;
@@ -745,7 +745,7 @@ nfp_bpf_get_stack_usage(struct nfp_prog *nfp_prog, unsigned int cnt)
 	goto continue_subprog;
 }
 
-static int nfp_bpf_finalize(struct bpf_verifier_env *env)
+int nfp_bpf_finalize(struct bpf_verifier_env *env)
 {
 	struct bpf_subprog_info *info;
 	struct nfp_prog *nfp_prog;
@@ -788,8 +788,3 @@ static int nfp_bpf_finalize(struct bpf_verifier_env *env)
 
 	return 0;
 }
-
-const struct bpf_prog_offload_ops nfp_bpf_analyzer_ops = {
-	.insn_hook	= nfp_verify_insn,
-	.finalize	= nfp_bpf_finalize,
-};

From 1385d755cfb42f596ef1cf9f5c761010ff3b34e7 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Fri, 9 Nov 2018 13:03:25 +0000
Subject: [PATCH 11/71] bpf: pass a struct with offload callbacks to
 bpf_offload_dev_create()

For passing device functions for offloaded eBPF programs, there used to
be no place where to store the pointer without making the non-offloaded
programs pay a memory price.

As a consequence, three functions were called with ndo_bpf() through
specific commands. Now that we have struct bpf_offload_dev, and since
none of those operations rely on RTNL, we can turn these three commands
into hooks inside the struct bpf_prog_offload_ops, and pass them as part
of bpf_offload_dev_create().

This commit effectively passes a pointer to the struct to
bpf_offload_dev_create(). We temporarily have two struct
bpf_prog_offload_ops instances, one under offdev->ops and one under
offload->dev_ops. The next patches will make the transition towards the
former, so that offload->dev_ops can be removed, and callbacks relying
on ndo_bpf() added to offdev->ops as well.

While at it, rename "nfp_bpf_analyzer_ops" as "nfp_bpf_dev_ops" (and
similarly for netdevsim).

Suggested-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 drivers/net/ethernet/netronome/nfp/bpf/main.c    | 2 +-
 drivers/net/ethernet/netronome/nfp/bpf/main.h    | 2 +-
 drivers/net/ethernet/netronome/nfp/bpf/offload.c | 4 ++--
 drivers/net/netdevsim/bpf.c                      | 6 +++---
 include/linux/bpf.h                              | 3 ++-
 kernel/bpf/offload.c                             | 5 ++++-
 6 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index 6243af0ab0255..dccae03192045 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -465,7 +465,7 @@ static int nfp_bpf_init(struct nfp_app *app)
 		app->ctrl_mtu = nfp_bpf_ctrl_cmsg_mtu(bpf);
 	}
 
-	bpf->bpf_dev = bpf_offload_dev_create();
+	bpf->bpf_dev = bpf_offload_dev_create(&nfp_bpf_dev_ops);
 	err = PTR_ERR_OR_ZERO(bpf->bpf_dev);
 	if (err)
 		goto err_free_neutral_maps;
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index abdd93d144396..941277936475c 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -513,7 +513,7 @@ int nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx,
 		    int prev_insn_idx);
 int nfp_bpf_finalize(struct bpf_verifier_env *env);
 
-extern const struct bpf_prog_offload_ops nfp_bpf_analyzer_ops;
+extern const struct bpf_prog_offload_ops nfp_bpf_dev_ops;
 
 struct netdev_bpf;
 struct nfp_app;
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
index dc548bb4089e2..2fca996a7e775 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
@@ -209,7 +209,7 @@ nfp_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn,
 		goto err_free;
 
 	nfp_prog->verifier_meta = nfp_prog_first_meta(nfp_prog);
-	bpf->verifier.ops = &nfp_bpf_analyzer_ops;
+	bpf->verifier.ops = &nfp_bpf_dev_ops;
 
 	return 0;
 
@@ -602,7 +602,7 @@ int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog,
 	return 0;
 }
 
-const struct bpf_prog_offload_ops nfp_bpf_analyzer_ops = {
+const struct bpf_prog_offload_ops nfp_bpf_dev_ops = {
 	.insn_hook	= nfp_verify_insn,
 	.finalize	= nfp_bpf_finalize,
 };
diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c
index cb3518474f0e4..135aee8641628 100644
--- a/drivers/net/netdevsim/bpf.c
+++ b/drivers/net/netdevsim/bpf.c
@@ -91,7 +91,7 @@ static int nsim_bpf_finalize(struct bpf_verifier_env *env)
 	return 0;
 }
 
-static const struct bpf_prog_offload_ops nsim_bpf_analyzer_ops = {
+static const struct bpf_prog_offload_ops nsim_bpf_dev_ops = {
 	.insn_hook	= nsim_bpf_verify_insn,
 	.finalize	= nsim_bpf_finalize,
 };
@@ -547,7 +547,7 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf)
 		if (err)
 			return err;
 
-		bpf->verifier.ops = &nsim_bpf_analyzer_ops;
+		bpf->verifier.ops = &nsim_bpf_dev_ops;
 		return 0;
 	case BPF_OFFLOAD_TRANSLATE:
 		state = bpf->offload.prog->aux->offload->dev_priv;
@@ -599,7 +599,7 @@ int nsim_bpf_init(struct netdevsim *ns)
 		if (IS_ERR_OR_NULL(ns->sdev->ddir_bpf_bound_progs))
 			return -ENOMEM;
 
-		ns->sdev->bpf_dev = bpf_offload_dev_create();
+		ns->sdev->bpf_dev = bpf_offload_dev_create(&nsim_bpf_dev_ops);
 		err = PTR_ERR_OR_ZERO(ns->sdev->bpf_dev);
 		if (err)
 			return err;
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index b6a296e01f6a9..c0197c37b2b2d 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -692,7 +692,8 @@ int bpf_map_offload_get_next_key(struct bpf_map *map,
 
 bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map);
 
-struct bpf_offload_dev *bpf_offload_dev_create(void);
+struct bpf_offload_dev *
+bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops);
 void bpf_offload_dev_destroy(struct bpf_offload_dev *offdev);
 int bpf_offload_dev_netdev_register(struct bpf_offload_dev *offdev,
 				    struct net_device *netdev);
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 8e93c47f07796..d513fbf9ca531 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -33,6 +33,7 @@
 static DECLARE_RWSEM(bpf_devs_lock);
 
 struct bpf_offload_dev {
+	const struct bpf_prog_offload_ops *ops;
 	struct list_head netdevs;
 };
 
@@ -655,7 +656,8 @@ void bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev,
 }
 EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_unregister);
 
-struct bpf_offload_dev *bpf_offload_dev_create(void)
+struct bpf_offload_dev *
+bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops)
 {
 	struct bpf_offload_dev *offdev;
 	int err;
@@ -673,6 +675,7 @@ struct bpf_offload_dev *bpf_offload_dev_create(void)
 	if (!offdev)
 		return ERR_PTR(-ENOMEM);
 
+	offdev->ops = ops;
 	INIT_LIST_HEAD(&offdev->netdevs);
 
 	return offdev;

From 341b3e7b7b89315c43d262da3199098bcf9bbe57 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Fri, 9 Nov 2018 13:03:26 +0000
Subject: [PATCH 12/71] bpf: call verify_insn from its callback in struct
 bpf_offload_dev

We intend to remove the dev_ops in struct bpf_prog_offload, and to only
keep the ops in struct bpf_offload_dev instead, which is accessible from
more locations for passing function pointers.

But dev_ops is used for calling the verify_insn hook. Switch to the
newly added ops in struct bpf_prog_offload instead.

To avoid table lookups for each eBPF instruction to verify, we remember
the offdev attached to a netdev and modify bpf_offload_find_netdev() to
avoid performing more than once a lookup for a given offload object.

Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h  | 1 +
 kernel/bpf/offload.c | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index c0197c37b2b2d..672714cd904ff 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -273,6 +273,7 @@ struct bpf_prog_offload_ops {
 struct bpf_prog_offload {
 	struct bpf_prog		*prog;
 	struct net_device	*netdev;
+	struct bpf_offload_dev	*offdev;
 	void			*dev_priv;
 	struct list_head	offloads;
 	bool			dev_state;
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index d513fbf9ca531..2cd3c0d0417b0 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -107,6 +107,7 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
 		err = -EINVAL;
 		goto err_unlock;
 	}
+	offload->offdev = ondev->offdev;
 	prog->aux->offload = offload;
 	list_add_tail(&offload->offloads, &ondev->progs);
 	dev_put(offload->netdev);
@@ -167,7 +168,8 @@ int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env,
 	down_read(&bpf_devs_lock);
 	offload = env->prog->aux->offload;
 	if (offload)
-		ret = offload->dev_ops->insn_hook(env, insn_idx, prev_insn_idx);
+		ret = offload->offdev->ops->insn_hook(env, insn_idx,
+						      prev_insn_idx);
 	up_read(&bpf_devs_lock);
 
 	return ret;

From 6dc18fa6f4cad69c892d6fb9499f7e41c6a88a8e Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Fri, 9 Nov 2018 13:03:27 +0000
Subject: [PATCH 13/71] bpf: call finalize() from its callback in struct
 bpf_offload_dev

In a way similar to the change previously brought to the verify_insn
hook, switch to the newly added ops in struct bpf_prog_offload for
calling the functions used to perform final verification steps for
offloaded programs.

Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/offload.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 2cd3c0d0417b0..2c88cb4ddfd85 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -183,8 +183,8 @@ int bpf_prog_offload_finalize(struct bpf_verifier_env *env)
 	down_read(&bpf_devs_lock);
 	offload = env->prog->aux->offload;
 	if (offload) {
-		if (offload->dev_ops->finalize)
-			ret = offload->dev_ops->finalize(env);
+		if (offload->offdev->ops->finalize)
+			ret = offload->offdev->ops->finalize(env);
 		else
 			ret = 0;
 	}

From 00db12c3d141356a4d1e6b6f688e0d5ed3b1f757 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Fri, 9 Nov 2018 13:03:28 +0000
Subject: [PATCH 14/71] bpf: call verifier_prep from its callback in struct
 bpf_offload_dev

In a way similar to the change previously brought to the verify_insn
hook and to the finalize callback, switch to the newly added ops in
struct bpf_prog_offload for calling the functions used to prepare driver
verifiers.

Since the dev_ops pointer in struct bpf_prog_offload is no longer used
by any callback, we can now remove it from struct bpf_prog_offload.

Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../net/ethernet/netronome/nfp/bpf/offload.c  | 11 +++----
 drivers/net/netdevsim/bpf.c                   | 32 ++++++++++---------
 include/linux/bpf.h                           |  2 +-
 include/linux/netdevice.h                     |  6 ----
 kernel/bpf/offload.c                          | 22 ++++++-------
 5 files changed, 32 insertions(+), 41 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
index 2fca996a7e775..16a3a9c55852f 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
@@ -188,10 +188,11 @@ static void nfp_prog_free(struct nfp_prog *nfp_prog)
 }
 
 static int
-nfp_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn,
-		      struct netdev_bpf *bpf)
+nfp_bpf_verifier_prep(struct net_device *netdev, struct bpf_verifier_env *env)
 {
-	struct bpf_prog *prog = bpf->verifier.prog;
+	struct nfp_net *nn = netdev_priv(netdev);
+	struct bpf_prog *prog = env->prog;
+	struct nfp_app *app = nn->app;
 	struct nfp_prog *nfp_prog;
 	int ret;
 
@@ -209,7 +210,6 @@ nfp_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn,
 		goto err_free;
 
 	nfp_prog->verifier_meta = nfp_prog_first_meta(nfp_prog);
-	bpf->verifier.ops = &nfp_bpf_dev_ops;
 
 	return 0;
 
@@ -422,8 +422,6 @@ nfp_bpf_map_free(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap)
 int nfp_ndo_bpf(struct nfp_app *app, struct nfp_net *nn, struct netdev_bpf *bpf)
 {
 	switch (bpf->command) {
-	case BPF_OFFLOAD_VERIFIER_PREP:
-		return nfp_bpf_verifier_prep(app, nn, bpf);
 	case BPF_OFFLOAD_TRANSLATE:
 		return nfp_bpf_translate(nn, bpf->offload.prog);
 	case BPF_OFFLOAD_DESTROY:
@@ -605,4 +603,5 @@ int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog,
 const struct bpf_prog_offload_ops nfp_bpf_dev_ops = {
 	.insn_hook	= nfp_verify_insn,
 	.finalize	= nfp_bpf_finalize,
+	.prepare	= nfp_bpf_verifier_prep,
 };
diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c
index 135aee8641628..d045b7d666d98 100644
--- a/drivers/net/netdevsim/bpf.c
+++ b/drivers/net/netdevsim/bpf.c
@@ -91,11 +91,6 @@ static int nsim_bpf_finalize(struct bpf_verifier_env *env)
 	return 0;
 }
 
-static const struct bpf_prog_offload_ops nsim_bpf_dev_ops = {
-	.insn_hook	= nsim_bpf_verify_insn,
-	.finalize	= nsim_bpf_finalize,
-};
-
 static bool nsim_xdp_offload_active(struct netdevsim *ns)
 {
 	return ns->xdp_hw.prog;
@@ -263,6 +258,17 @@ static int nsim_bpf_create_prog(struct netdevsim *ns, struct bpf_prog *prog)
 	return 0;
 }
 
+static int
+nsim_bpf_verifier_prep(struct net_device *dev, struct bpf_verifier_env *env)
+{
+	struct netdevsim *ns = netdev_priv(dev);
+
+	if (!ns->bpf_bind_accept)
+		return -EOPNOTSUPP;
+
+	return nsim_bpf_create_prog(ns, env->prog);
+}
+
 static void nsim_bpf_destroy_prog(struct bpf_prog *prog)
 {
 	struct nsim_bpf_bound_prog *state;
@@ -275,6 +281,12 @@ static void nsim_bpf_destroy_prog(struct bpf_prog *prog)
 	kfree(state);
 }
 
+static const struct bpf_prog_offload_ops nsim_bpf_dev_ops = {
+	.insn_hook	= nsim_bpf_verify_insn,
+	.finalize	= nsim_bpf_finalize,
+	.prepare	= nsim_bpf_verifier_prep,
+};
+
 static int nsim_setup_prog_checks(struct netdevsim *ns, struct netdev_bpf *bpf)
 {
 	if (bpf->prog && bpf->prog->aux->offload) {
@@ -539,16 +551,6 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf)
 	ASSERT_RTNL();
 
 	switch (bpf->command) {
-	case BPF_OFFLOAD_VERIFIER_PREP:
-		if (!ns->bpf_bind_accept)
-			return -EOPNOTSUPP;
-
-		err = nsim_bpf_create_prog(ns, bpf->verifier.prog);
-		if (err)
-			return err;
-
-		bpf->verifier.ops = &nsim_bpf_dev_ops;
-		return 0;
 	case BPF_OFFLOAD_TRANSLATE:
 		state = bpf->offload.prog->aux->offload->dev_priv;
 
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 672714cd904ff..f250494a4f56e 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -268,6 +268,7 @@ struct bpf_prog_offload_ops {
 	int (*insn_hook)(struct bpf_verifier_env *env,
 			 int insn_idx, int prev_insn_idx);
 	int (*finalize)(struct bpf_verifier_env *env);
+	int (*prepare)(struct net_device *netdev, struct bpf_verifier_env *env);
 };
 
 struct bpf_prog_offload {
@@ -277,7 +278,6 @@ struct bpf_prog_offload {
 	void			*dev_priv;
 	struct list_head	offloads;
 	bool			dev_state;
-	const struct bpf_prog_offload_ops *dev_ops;
 	void			*jited_image;
 	u32			jited_len;
 };
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 857f8abf7b91b..0fa2c27449281 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -863,7 +863,6 @@ enum bpf_netdev_command {
 	XDP_QUERY_PROG,
 	XDP_QUERY_PROG_HW,
 	/* BPF program for offload callbacks, invoked at program load time. */
-	BPF_OFFLOAD_VERIFIER_PREP,
 	BPF_OFFLOAD_TRANSLATE,
 	BPF_OFFLOAD_DESTROY,
 	BPF_OFFLOAD_MAP_ALLOC,
@@ -891,11 +890,6 @@ struct netdev_bpf {
 			/* flags with which program was installed */
 			u32 prog_flags;
 		};
-		/* BPF_OFFLOAD_VERIFIER_PREP */
-		struct {
-			struct bpf_prog *prog;
-			const struct bpf_prog_offload_ops *ops; /* callee set */
-		} verifier;
 		/* BPF_OFFLOAD_TRANSLATE, BPF_OFFLOAD_DESTROY */
 		struct {
 			struct bpf_prog *prog;
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 2c88cb4ddfd85..1f7ac00a494da 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -142,21 +142,17 @@ static int __bpf_offload_ndo(struct bpf_prog *prog, enum bpf_netdev_command cmd,
 
 int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env)
 {
-	struct netdev_bpf data = {};
-	int err;
-
-	data.verifier.prog = env->prog;
+	struct bpf_prog_offload *offload;
+	int ret = -ENODEV;
 
-	rtnl_lock();
-	err = __bpf_offload_ndo(env->prog, BPF_OFFLOAD_VERIFIER_PREP, &data);
-	if (err)
-		goto exit_unlock;
+	down_read(&bpf_devs_lock);
+	offload = env->prog->aux->offload;
+	if (offload)
+		ret = offload->offdev->ops->prepare(offload->netdev, env);
+	offload->dev_state = !ret;
+	up_read(&bpf_devs_lock);
 
-	env->prog->aux->offload->dev_ops = data.verifier.ops;
-	env->prog->aux->offload->dev_state = true;
-exit_unlock:
-	rtnl_unlock();
-	return err;
+	return ret;
 }
 
 int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env,

From b07ade27e93360197e453e5ca80eebdc9099dcb5 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Fri, 9 Nov 2018 13:03:29 +0000
Subject: [PATCH 15/71] bpf: pass translate() as a callback and remove its
 ndo_bpf subcommand

As part of the transition from ndo_bpf() to callbacks attached to struct
bpf_offload_dev for some of the eBPF offload operations, move the
functions related to code translation to the struct and remove the
subcommand that was used to call them through the NDO.

Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 drivers/net/ethernet/netronome/nfp/bpf/offload.c | 11 +++--------
 drivers/net/netdevsim/bpf.c                      | 14 +++++++++-----
 include/linux/bpf.h                              |  1 +
 include/linux/netdevice.h                        |  3 +--
 kernel/bpf/offload.c                             | 14 +++++++-------
 5 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
index 16a3a9c55852f..8653a2189c190 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
@@ -33,9 +33,6 @@ nfp_map_ptr_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog,
 	struct nfp_bpf_neutral_map *record;
 	int err;
 
-	/* Map record paths are entered via ndo, update side is protected. */
-	ASSERT_RTNL();
-
 	/* Reuse path - other offloaded program is already tracking this map. */
 	record = rhashtable_lookup_fast(&bpf->maps_neutral, &map->id,
 					nfp_bpf_maps_neutral_params);
@@ -84,8 +81,6 @@ nfp_map_ptrs_forget(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog)
 	bool freed = false;
 	int i;
 
-	ASSERT_RTNL();
-
 	for (i = 0; i < nfp_prog->map_records_cnt; i++) {
 		if (--nfp_prog->map_records[i]->count) {
 			nfp_prog->map_records[i] = NULL;
@@ -219,9 +214,10 @@ nfp_bpf_verifier_prep(struct net_device *netdev, struct bpf_verifier_env *env)
 	return ret;
 }
 
-static int nfp_bpf_translate(struct nfp_net *nn, struct bpf_prog *prog)
+static int nfp_bpf_translate(struct net_device *netdev, struct bpf_prog *prog)
 {
 	struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv;
+	struct nfp_net *nn = netdev_priv(netdev);
 	unsigned int max_instr;
 	int err;
 
@@ -422,8 +418,6 @@ nfp_bpf_map_free(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap)
 int nfp_ndo_bpf(struct nfp_app *app, struct nfp_net *nn, struct netdev_bpf *bpf)
 {
 	switch (bpf->command) {
-	case BPF_OFFLOAD_TRANSLATE:
-		return nfp_bpf_translate(nn, bpf->offload.prog);
 	case BPF_OFFLOAD_DESTROY:
 		return nfp_bpf_destroy(nn, bpf->offload.prog);
 	case BPF_OFFLOAD_MAP_ALLOC:
@@ -604,4 +598,5 @@ const struct bpf_prog_offload_ops nfp_bpf_dev_ops = {
 	.insn_hook	= nfp_verify_insn,
 	.finalize	= nfp_bpf_finalize,
 	.prepare	= nfp_bpf_verifier_prep,
+	.translate	= nfp_bpf_translate,
 };
diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c
index d045b7d666d98..30c2cd516d1c6 100644
--- a/drivers/net/netdevsim/bpf.c
+++ b/drivers/net/netdevsim/bpf.c
@@ -269,6 +269,14 @@ nsim_bpf_verifier_prep(struct net_device *dev, struct bpf_verifier_env *env)
 	return nsim_bpf_create_prog(ns, env->prog);
 }
 
+static int nsim_bpf_translate(struct net_device *dev, struct bpf_prog *prog)
+{
+	struct nsim_bpf_bound_prog *state = prog->aux->offload->dev_priv;
+
+	state->state = "xlated";
+	return 0;
+}
+
 static void nsim_bpf_destroy_prog(struct bpf_prog *prog)
 {
 	struct nsim_bpf_bound_prog *state;
@@ -285,6 +293,7 @@ static const struct bpf_prog_offload_ops nsim_bpf_dev_ops = {
 	.insn_hook	= nsim_bpf_verify_insn,
 	.finalize	= nsim_bpf_finalize,
 	.prepare	= nsim_bpf_verifier_prep,
+	.translate	= nsim_bpf_translate,
 };
 
 static int nsim_setup_prog_checks(struct netdevsim *ns, struct netdev_bpf *bpf)
@@ -551,11 +560,6 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf)
 	ASSERT_RTNL();
 
 	switch (bpf->command) {
-	case BPF_OFFLOAD_TRANSLATE:
-		state = bpf->offload.prog->aux->offload->dev_priv;
-
-		state->state = "xlated";
-		return 0;
 	case BPF_OFFLOAD_DESTROY:
 		nsim_bpf_destroy_prog(bpf->offload.prog);
 		return 0;
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f250494a4f56e..d1eb3c8a3fa9a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -269,6 +269,7 @@ struct bpf_prog_offload_ops {
 			 int insn_idx, int prev_insn_idx);
 	int (*finalize)(struct bpf_verifier_env *env);
 	int (*prepare)(struct net_device *netdev, struct bpf_verifier_env *env);
+	int (*translate)(struct net_device *netdev, struct bpf_prog *prog);
 };
 
 struct bpf_prog_offload {
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0fa2c27449281..27499127e0385 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -863,7 +863,6 @@ enum bpf_netdev_command {
 	XDP_QUERY_PROG,
 	XDP_QUERY_PROG_HW,
 	/* BPF program for offload callbacks, invoked at program load time. */
-	BPF_OFFLOAD_TRANSLATE,
 	BPF_OFFLOAD_DESTROY,
 	BPF_OFFLOAD_MAP_ALLOC,
 	BPF_OFFLOAD_MAP_FREE,
@@ -890,7 +889,7 @@ struct netdev_bpf {
 			/* flags with which program was installed */
 			u32 prog_flags;
 		};
-		/* BPF_OFFLOAD_TRANSLATE, BPF_OFFLOAD_DESTROY */
+		/* BPF_OFFLOAD_DESTROY */
 		struct {
 			struct bpf_prog *prog;
 		} offload;
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 1f7ac00a494da..ae0167366c12f 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -219,14 +219,14 @@ void bpf_prog_offload_destroy(struct bpf_prog *prog)
 
 static int bpf_prog_offload_translate(struct bpf_prog *prog)
 {
-	struct netdev_bpf data = {};
-	int ret;
-
-	data.offload.prog = prog;
+	struct bpf_prog_offload *offload;
+	int ret = -ENODEV;
 
-	rtnl_lock();
-	ret = __bpf_offload_ndo(prog, BPF_OFFLOAD_TRANSLATE, &data);
-	rtnl_unlock();
+	down_read(&bpf_devs_lock);
+	offload = prog->aux->offload;
+	if (offload)
+		ret = offload->offdev->ops->translate(offload->netdev, prog);
+	up_read(&bpf_devs_lock);
 
 	return ret;
 }

From eb9119471efbf730c8f830f706026b486eb701dd Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Fri, 9 Nov 2018 13:03:30 +0000
Subject: [PATCH 16/71] bpf: pass destroy() as a callback and remove its
 ndo_bpf subcommand

As part of the transition from ndo_bpf() to callbacks attached to struct
bpf_offload_dev for some of the eBPF offload operations, move the
functions related to program destruction to the struct and remove the
subcommand that was used to call them through the NDO.

Remove function __bpf_offload_ndo(), which is no longer used.

Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../net/ethernet/netronome/nfp/bpf/offload.c  |  7 ++----
 drivers/net/netdevsim/bpf.c                   |  4 +---
 include/linux/bpf.h                           |  1 +
 include/linux/netdevice.h                     |  5 ----
 kernel/bpf/offload.c                          | 24 +------------------
 5 files changed, 5 insertions(+), 36 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
index 8653a2189c190..91085cc3c843b 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
@@ -238,15 +238,13 @@ static int nfp_bpf_translate(struct net_device *netdev, struct bpf_prog *prog)
 	return nfp_map_ptrs_record(nfp_prog->bpf, nfp_prog, prog);
 }
 
-static int nfp_bpf_destroy(struct nfp_net *nn, struct bpf_prog *prog)
+static void nfp_bpf_destroy(struct bpf_prog *prog)
 {
 	struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv;
 
 	kvfree(nfp_prog->prog);
 	nfp_map_ptrs_forget(nfp_prog->bpf, nfp_prog);
 	nfp_prog_free(nfp_prog);
-
-	return 0;
 }
 
 /* Atomic engine requires values to be in big endian, we need to byte swap
@@ -418,8 +416,6 @@ nfp_bpf_map_free(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap)
 int nfp_ndo_bpf(struct nfp_app *app, struct nfp_net *nn, struct netdev_bpf *bpf)
 {
 	switch (bpf->command) {
-	case BPF_OFFLOAD_DESTROY:
-		return nfp_bpf_destroy(nn, bpf->offload.prog);
 	case BPF_OFFLOAD_MAP_ALLOC:
 		return nfp_bpf_map_alloc(app->priv, bpf->offmap);
 	case BPF_OFFLOAD_MAP_FREE:
@@ -599,4 +595,5 @@ const struct bpf_prog_offload_ops nfp_bpf_dev_ops = {
 	.finalize	= nfp_bpf_finalize,
 	.prepare	= nfp_bpf_verifier_prep,
 	.translate	= nfp_bpf_translate,
+	.destroy	= nfp_bpf_destroy,
 };
diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c
index 30c2cd516d1c6..33e3d54c3a0a8 100644
--- a/drivers/net/netdevsim/bpf.c
+++ b/drivers/net/netdevsim/bpf.c
@@ -294,6 +294,7 @@ static const struct bpf_prog_offload_ops nsim_bpf_dev_ops = {
 	.finalize	= nsim_bpf_finalize,
 	.prepare	= nsim_bpf_verifier_prep,
 	.translate	= nsim_bpf_translate,
+	.destroy	= nsim_bpf_destroy_prog,
 };
 
 static int nsim_setup_prog_checks(struct netdevsim *ns, struct netdev_bpf *bpf)
@@ -560,9 +561,6 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf)
 	ASSERT_RTNL();
 
 	switch (bpf->command) {
-	case BPF_OFFLOAD_DESTROY:
-		nsim_bpf_destroy_prog(bpf->offload.prog);
-		return 0;
 	case XDP_QUERY_PROG:
 		return xdp_attachment_query(&ns->xdp, bpf);
 	case XDP_QUERY_PROG_HW:
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index d1eb3c8a3fa9a..867d2801db645 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -270,6 +270,7 @@ struct bpf_prog_offload_ops {
 	int (*finalize)(struct bpf_verifier_env *env);
 	int (*prepare)(struct net_device *netdev, struct bpf_verifier_env *env);
 	int (*translate)(struct net_device *netdev, struct bpf_prog *prog);
+	void (*destroy)(struct bpf_prog *prog);
 };
 
 struct bpf_prog_offload {
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 27499127e0385..17d52a647fe57 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -863,7 +863,6 @@ enum bpf_netdev_command {
 	XDP_QUERY_PROG,
 	XDP_QUERY_PROG_HW,
 	/* BPF program for offload callbacks, invoked at program load time. */
-	BPF_OFFLOAD_DESTROY,
 	BPF_OFFLOAD_MAP_ALLOC,
 	BPF_OFFLOAD_MAP_FREE,
 	XDP_QUERY_XSK_UMEM,
@@ -889,10 +888,6 @@ struct netdev_bpf {
 			/* flags with which program was installed */
 			u32 prog_flags;
 		};
-		/* BPF_OFFLOAD_DESTROY */
-		struct {
-			struct bpf_prog *prog;
-		} offload;
 		/* BPF_OFFLOAD_MAP_ALLOC, BPF_OFFLOAD_MAP_FREE */
 		struct {
 			struct bpf_offloaded_map *offmap;
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index ae0167366c12f..d665e75a0ac3b 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -123,23 +123,6 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
 	return err;
 }
 
-static int __bpf_offload_ndo(struct bpf_prog *prog, enum bpf_netdev_command cmd,
-			     struct netdev_bpf *data)
-{
-	struct bpf_prog_offload *offload = prog->aux->offload;
-	struct net_device *netdev;
-
-	ASSERT_RTNL();
-
-	if (!offload)
-		return -ENODEV;
-	netdev = offload->netdev;
-
-	data->command = cmd;
-
-	return netdev->netdev_ops->ndo_bpf(netdev, data);
-}
-
 int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env)
 {
 	struct bpf_prog_offload *offload;
@@ -192,12 +175,9 @@ int bpf_prog_offload_finalize(struct bpf_verifier_env *env)
 static void __bpf_prog_offload_destroy(struct bpf_prog *prog)
 {
 	struct bpf_prog_offload *offload = prog->aux->offload;
-	struct netdev_bpf data = {};
-
-	data.offload.prog = prog;
 
 	if (offload->dev_state)
-		WARN_ON(__bpf_offload_ndo(prog, BPF_OFFLOAD_DESTROY, &data));
+		offload->offdev->ops->destroy(prog);
 
 	/* Make sure BPF_PROG_GET_NEXT_ID can't find this dead program */
 	bpf_prog_free_id(prog, true);
@@ -209,12 +189,10 @@ static void __bpf_prog_offload_destroy(struct bpf_prog *prog)
 
 void bpf_prog_offload_destroy(struct bpf_prog *prog)
 {
-	rtnl_lock();
 	down_write(&bpf_devs_lock);
 	if (prog->aux->offload)
 		__bpf_prog_offload_destroy(prog);
 	up_write(&bpf_devs_lock);
-	rtnl_unlock();
 }
 
 static int bpf_prog_offload_translate(struct bpf_prog *prog)

From a40a26322a83d4a26a99ad2616cbd77394c19587 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Fri, 9 Nov 2018 13:03:31 +0000
Subject: [PATCH 17/71] bpf: pass prog instead of env to
 bpf_prog_offload_verifier_prep()

Function bpf_prog_offload_verifier_prep(), called from the kernel BPF
verifier to run a driver-specific callback for preparing for the
verification step for offloaded programs, takes a pointer to a struct
bpf_verifier_env object. However, no driver callback needs the whole
structure at this time: the two drivers supporting this, nfp and
netdevsim, only need a pointer to the struct bpf_prog instance held by
env.

Update the callback accordingly, on kernel side and in these two
drivers.

Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 drivers/net/ethernet/netronome/nfp/bpf/offload.c | 3 +--
 drivers/net/netdevsim/bpf.c                      | 4 ++--
 include/linux/bpf.h                              | 2 +-
 include/linux/bpf_verifier.h                     | 2 +-
 kernel/bpf/offload.c                             | 6 +++---
 kernel/bpf/verifier.c                            | 2 +-
 6 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
index 91085cc3c843b..e6b26d2f651db 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
@@ -183,10 +183,9 @@ static void nfp_prog_free(struct nfp_prog *nfp_prog)
 }
 
 static int
-nfp_bpf_verifier_prep(struct net_device *netdev, struct bpf_verifier_env *env)
+nfp_bpf_verifier_prep(struct net_device *netdev, struct bpf_prog *prog)
 {
 	struct nfp_net *nn = netdev_priv(netdev);
-	struct bpf_prog *prog = env->prog;
 	struct nfp_app *app = nn->app;
 	struct nfp_prog *nfp_prog;
 	int ret;
diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c
index 33e3d54c3a0a8..560bdaf1c98b3 100644
--- a/drivers/net/netdevsim/bpf.c
+++ b/drivers/net/netdevsim/bpf.c
@@ -259,14 +259,14 @@ static int nsim_bpf_create_prog(struct netdevsim *ns, struct bpf_prog *prog)
 }
 
 static int
-nsim_bpf_verifier_prep(struct net_device *dev, struct bpf_verifier_env *env)
+nsim_bpf_verifier_prep(struct net_device *dev, struct bpf_prog *prog)
 {
 	struct netdevsim *ns = netdev_priv(dev);
 
 	if (!ns->bpf_bind_accept)
 		return -EOPNOTSUPP;
 
-	return nsim_bpf_create_prog(ns, env->prog);
+	return nsim_bpf_create_prog(ns, prog);
 }
 
 static int nsim_bpf_translate(struct net_device *dev, struct bpf_prog *prog)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 867d2801db645..888111350d0e7 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -268,7 +268,7 @@ struct bpf_prog_offload_ops {
 	int (*insn_hook)(struct bpf_verifier_env *env,
 			 int insn_idx, int prev_insn_idx);
 	int (*finalize)(struct bpf_verifier_env *env);
-	int (*prepare)(struct net_device *netdev, struct bpf_verifier_env *env);
+	int (*prepare)(struct net_device *netdev, struct bpf_prog *prog);
 	int (*translate)(struct net_device *netdev, struct bpf_prog *prog);
 	void (*destroy)(struct bpf_prog *prog);
 };
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index d93e89761a8b4..11f5df1092d9b 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -245,7 +245,7 @@ static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env)
 	return cur_func(env)->regs;
 }
 
-int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env);
+int bpf_prog_offload_verifier_prep(struct bpf_prog *prog);
 int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env,
 				 int insn_idx, int prev_insn_idx);
 int bpf_prog_offload_finalize(struct bpf_verifier_env *env);
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index d665e75a0ac3b..397d206e184b2 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -123,15 +123,15 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
 	return err;
 }
 
-int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env)
+int bpf_prog_offload_verifier_prep(struct bpf_prog *prog)
 {
 	struct bpf_prog_offload *offload;
 	int ret = -ENODEV;
 
 	down_read(&bpf_devs_lock);
-	offload = env->prog->aux->offload;
+	offload = prog->aux->offload;
 	if (offload)
-		ret = offload->offdev->ops->prepare(offload->netdev, env);
+		ret = offload->offdev->ops->prepare(offload->netdev, prog);
 	offload->dev_state = !ret;
 	up_read(&bpf_devs_lock);
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 75dab40b19a3e..8d0977980cfa1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6368,7 +6368,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
 		goto skip_full_check;
 
 	if (bpf_prog_is_dev_bound(env->prog->aux)) {
-		ret = bpf_prog_offload_verifier_prep(env);
+		ret = bpf_prog_offload_verifier_prep(env->prog);
 		if (ret)
 			goto skip_full_check;
 	}

From 16a8cb5cffd0a2929ae97bc258d2d9c92a4e7f6d Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Fri, 9 Nov 2018 13:03:32 +0000
Subject: [PATCH 18/71] bpf: do not pass netdev to translate() and prepare()
 offload callbacks

The kernel functions to prepare verifier and translate for offloaded
program retrieve "offload" from "prog", and "netdev" from "offload".
Then both "prog" and "netdev" are passed to the callbacks.

Simplify this by letting the drivers retrieve the net device themselves
from the offload object attached to prog - if they need it at all. There
is currently no need to pass the netdev as an argument to those
functions.

Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 drivers/net/ethernet/netronome/nfp/bpf/offload.c | 9 ++++-----
 drivers/net/netdevsim/bpf.c                      | 7 +++----
 include/linux/bpf.h                              | 4 ++--
 kernel/bpf/offload.c                             | 4 ++--
 4 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
index e6b26d2f651db..f0283854fade4 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
@@ -182,10 +182,9 @@ static void nfp_prog_free(struct nfp_prog *nfp_prog)
 	kfree(nfp_prog);
 }
 
-static int
-nfp_bpf_verifier_prep(struct net_device *netdev, struct bpf_prog *prog)
+static int nfp_bpf_verifier_prep(struct bpf_prog *prog)
 {
-	struct nfp_net *nn = netdev_priv(netdev);
+	struct nfp_net *nn = netdev_priv(prog->aux->offload->netdev);
 	struct nfp_app *app = nn->app;
 	struct nfp_prog *nfp_prog;
 	int ret;
@@ -213,10 +212,10 @@ nfp_bpf_verifier_prep(struct net_device *netdev, struct bpf_prog *prog)
 	return ret;
 }
 
-static int nfp_bpf_translate(struct net_device *netdev, struct bpf_prog *prog)
+static int nfp_bpf_translate(struct bpf_prog *prog)
 {
+	struct nfp_net *nn = netdev_priv(prog->aux->offload->netdev);
 	struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv;
-	struct nfp_net *nn = netdev_priv(netdev);
 	unsigned int max_instr;
 	int err;
 
diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c
index 560bdaf1c98b3..6a5b7bd9a1f96 100644
--- a/drivers/net/netdevsim/bpf.c
+++ b/drivers/net/netdevsim/bpf.c
@@ -258,10 +258,9 @@ static int nsim_bpf_create_prog(struct netdevsim *ns, struct bpf_prog *prog)
 	return 0;
 }
 
-static int
-nsim_bpf_verifier_prep(struct net_device *dev, struct bpf_prog *prog)
+static int nsim_bpf_verifier_prep(struct bpf_prog *prog)
 {
-	struct netdevsim *ns = netdev_priv(dev);
+	struct netdevsim *ns = netdev_priv(prog->aux->offload->netdev);
 
 	if (!ns->bpf_bind_accept)
 		return -EOPNOTSUPP;
@@ -269,7 +268,7 @@ nsim_bpf_verifier_prep(struct net_device *dev, struct bpf_prog *prog)
 	return nsim_bpf_create_prog(ns, prog);
 }
 
-static int nsim_bpf_translate(struct net_device *dev, struct bpf_prog *prog)
+static int nsim_bpf_translate(struct bpf_prog *prog)
 {
 	struct nsim_bpf_bound_prog *state = prog->aux->offload->dev_priv;
 
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 888111350d0e7..987815152629a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -268,8 +268,8 @@ struct bpf_prog_offload_ops {
 	int (*insn_hook)(struct bpf_verifier_env *env,
 			 int insn_idx, int prev_insn_idx);
 	int (*finalize)(struct bpf_verifier_env *env);
-	int (*prepare)(struct net_device *netdev, struct bpf_prog *prog);
-	int (*translate)(struct net_device *netdev, struct bpf_prog *prog);
+	int (*prepare)(struct bpf_prog *prog);
+	int (*translate)(struct bpf_prog *prog);
 	void (*destroy)(struct bpf_prog *prog);
 };
 
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 397d206e184b2..52c5617e37160 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -131,7 +131,7 @@ int bpf_prog_offload_verifier_prep(struct bpf_prog *prog)
 	down_read(&bpf_devs_lock);
 	offload = prog->aux->offload;
 	if (offload)
-		ret = offload->offdev->ops->prepare(offload->netdev, prog);
+		ret = offload->offdev->ops->prepare(prog);
 	offload->dev_state = !ret;
 	up_read(&bpf_devs_lock);
 
@@ -203,7 +203,7 @@ static int bpf_prog_offload_translate(struct bpf_prog *prog)
 	down_read(&bpf_devs_lock);
 	offload = prog->aux->offload;
 	if (offload)
-		ret = offload->offdev->ops->translate(offload->netdev, prog);
+		ret = offload->offdev->ops->translate(prog);
 	up_read(&bpf_devs_lock);
 
 	return ret;

From 108d50a976db70c59e6f2fc58d3252fd38ef3fc4 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Fri, 9 Nov 2018 08:21:40 -0800
Subject: [PATCH 19/71] selftests/bpf: rename flow dissector section to
 flow_dissector

Makes it compatible with the logic that derives program type
from section name in libbpf_prog_type_by_name.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/bpf_flow.c             | 2 +-
 tools/testing/selftests/bpf/test_flow_dissector.sh | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/bpf/bpf_flow.c b/tools/testing/selftests/bpf/bpf_flow.c
index 107350a7821d0..b9798f558ca7f 100644
--- a/tools/testing/selftests/bpf/bpf_flow.c
+++ b/tools/testing/selftests/bpf/bpf_flow.c
@@ -116,7 +116,7 @@ static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto)
 	return BPF_DROP;
 }
 
-SEC("dissect")
+SEC("flow_dissector")
 int _dissect(struct __sk_buff *skb)
 {
 	if (!skb->vlan_present)
diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh
index c0fb073b5eab2..d23d4da66b834 100755
--- a/tools/testing/selftests/bpf/test_flow_dissector.sh
+++ b/tools/testing/selftests/bpf/test_flow_dissector.sh
@@ -59,7 +59,7 @@ else
 fi
 
 # Attach BPF program
-./flow_dissector_load -p bpf_flow.o -s dissect
+./flow_dissector_load -p bpf_flow.o -s flow_dissector
 
 # Setup
 tc qdisc add dev lo ingress

From 0c19a9fbc9cdba29c7effb34fd5a97226bf934e6 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Fri, 9 Nov 2018 08:21:41 -0800
Subject: [PATCH 20/71] libbpf: cleanup after partial failure in
 bpf_object__pin

bpftool will use bpf_object__pin in the next commits to pin all programs
and maps from the file; in case of a partial failure, we need to get
back to the clean state (undo previous program/map pins).

As part of a cleanup, I've added and exported separate routines to
pin all maps (bpf_object__pin_maps) and progs (bpf_object__pin_programs)
of an object.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/libbpf.c | 324 ++++++++++++++++++++++++++++++++++++++---
 tools/lib/bpf/libbpf.h |  18 +++
 2 files changed, 319 insertions(+), 23 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index d6e62e90e8d44..341008f47c8a5 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1699,6 +1699,34 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
 	return 0;
 }
 
+int bpf_program__unpin_instance(struct bpf_program *prog, const char *path,
+				int instance)
+{
+	int err;
+
+	err = check_path(path);
+	if (err)
+		return err;
+
+	if (prog == NULL) {
+		pr_warning("invalid program pointer\n");
+		return -EINVAL;
+	}
+
+	if (instance < 0 || instance >= prog->instances.nr) {
+		pr_warning("invalid prog instance %d of prog %s (max %d)\n",
+			   instance, prog->section_name, prog->instances.nr);
+		return -EINVAL;
+	}
+
+	err = unlink(path);
+	if (err != 0)
+		return -errno;
+	pr_debug("unpinned program '%s'\n", path);
+
+	return 0;
+}
+
 static int make_dir(const char *path)
 {
 	char *cp, errmsg[STRERR_BUFSIZE];
@@ -1737,6 +1765,64 @@ int bpf_program__pin(struct bpf_program *prog, const char *path)
 	if (err)
 		return err;
 
+	for (i = 0; i < prog->instances.nr; i++) {
+		char buf[PATH_MAX];
+		int len;
+
+		len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
+		if (len < 0) {
+			err = -EINVAL;
+			goto err_unpin;
+		} else if (len >= PATH_MAX) {
+			err = -ENAMETOOLONG;
+			goto err_unpin;
+		}
+
+		err = bpf_program__pin_instance(prog, buf, i);
+		if (err)
+			goto err_unpin;
+	}
+
+	return 0;
+
+err_unpin:
+	for (i = i - 1; i >= 0; i--) {
+		char buf[PATH_MAX];
+		int len;
+
+		len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
+		if (len < 0)
+			continue;
+		else if (len >= PATH_MAX)
+			continue;
+
+		bpf_program__unpin_instance(prog, buf, i);
+	}
+
+	rmdir(path);
+
+	return err;
+}
+
+int bpf_program__unpin(struct bpf_program *prog, const char *path)
+{
+	int i, err;
+
+	err = check_path(path);
+	if (err)
+		return err;
+
+	if (prog == NULL) {
+		pr_warning("invalid program pointer\n");
+		return -EINVAL;
+	}
+
+	if (prog->instances.nr <= 0) {
+		pr_warning("no instances of prog %s to pin\n",
+			   prog->section_name);
+		return -EINVAL;
+	}
+
 	for (i = 0; i < prog->instances.nr; i++) {
 		char buf[PATH_MAX];
 		int len;
@@ -1747,11 +1833,15 @@ int bpf_program__pin(struct bpf_program *prog, const char *path)
 		else if (len >= PATH_MAX)
 			return -ENAMETOOLONG;
 
-		err = bpf_program__pin_instance(prog, buf, i);
+		err = bpf_program__unpin_instance(prog, buf, i);
 		if (err)
 			return err;
 	}
 
+	err = rmdir(path);
+	if (err)
+		return -errno;
+
 	return 0;
 }
 
@@ -1776,12 +1866,33 @@ int bpf_map__pin(struct bpf_map *map, const char *path)
 	}
 
 	pr_debug("pinned map '%s'\n", path);
+
 	return 0;
 }
 
-int bpf_object__pin(struct bpf_object *obj, const char *path)
+int bpf_map__unpin(struct bpf_map *map, const char *path)
+{
+	int err;
+
+	err = check_path(path);
+	if (err)
+		return err;
+
+	if (map == NULL) {
+		pr_warning("invalid map pointer\n");
+		return -EINVAL;
+	}
+
+	err = unlink(path);
+	if (err != 0)
+		return -errno;
+	pr_debug("unpinned map '%s'\n", path);
+
+	return 0;
+}
+
+int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
 {
-	struct bpf_program *prog;
 	struct bpf_map *map;
 	int err;
 
@@ -1797,6 +1908,53 @@ int bpf_object__pin(struct bpf_object *obj, const char *path)
 	if (err)
 		return err;
 
+	bpf_map__for_each(map, obj) {
+		char buf[PATH_MAX];
+		int len;
+
+		len = snprintf(buf, PATH_MAX, "%s/%s", path,
+			       bpf_map__name(map));
+		if (len < 0) {
+			err = -EINVAL;
+			goto err_unpin_maps;
+		} else if (len >= PATH_MAX) {
+			err = -ENAMETOOLONG;
+			goto err_unpin_maps;
+		}
+
+		err = bpf_map__pin(map, buf);
+		if (err)
+			goto err_unpin_maps;
+	}
+
+	return 0;
+
+err_unpin_maps:
+	while ((map = bpf_map__prev(map, obj))) {
+		char buf[PATH_MAX];
+		int len;
+
+		len = snprintf(buf, PATH_MAX, "%s/%s", path,
+			       bpf_map__name(map));
+		if (len < 0)
+			continue;
+		else if (len >= PATH_MAX)
+			continue;
+
+		bpf_map__unpin(map, buf);
+	}
+
+	return err;
+}
+
+int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
+{
+	struct bpf_map *map;
+	int err;
+
+	if (!obj)
+		return -ENOENT;
+
 	bpf_map__for_each(map, obj) {
 		char buf[PATH_MAX];
 		int len;
@@ -1808,11 +1966,78 @@ int bpf_object__pin(struct bpf_object *obj, const char *path)
 		else if (len >= PATH_MAX)
 			return -ENAMETOOLONG;
 
-		err = bpf_map__pin(map, buf);
+		err = bpf_map__unpin(map, buf);
 		if (err)
 			return err;
 	}
 
+	return 0;
+}
+
+int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
+{
+	struct bpf_program *prog;
+	int err;
+
+	if (!obj)
+		return -ENOENT;
+
+	if (!obj->loaded) {
+		pr_warning("object not yet loaded; load it first\n");
+		return -ENOENT;
+	}
+
+	err = make_dir(path);
+	if (err)
+		return err;
+
+	bpf_object__for_each_program(prog, obj) {
+		char buf[PATH_MAX];
+		int len;
+
+		len = snprintf(buf, PATH_MAX, "%s/%s", path,
+			       prog->section_name);
+		if (len < 0) {
+			err = -EINVAL;
+			goto err_unpin_programs;
+		} else if (len >= PATH_MAX) {
+			err = -ENAMETOOLONG;
+			goto err_unpin_programs;
+		}
+
+		err = bpf_program__pin(prog, buf);
+		if (err)
+			goto err_unpin_programs;
+	}
+
+	return 0;
+
+err_unpin_programs:
+	while ((prog = bpf_program__prev(prog, obj))) {
+		char buf[PATH_MAX];
+		int len;
+
+		len = snprintf(buf, PATH_MAX, "%s/%s", path,
+			       prog->section_name);
+		if (len < 0)
+			continue;
+		else if (len >= PATH_MAX)
+			continue;
+
+		bpf_program__unpin(prog, buf);
+	}
+
+	return err;
+}
+
+int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
+{
+	struct bpf_program *prog;
+	int err;
+
+	if (!obj)
+		return -ENOENT;
+
 	bpf_object__for_each_program(prog, obj) {
 		char buf[PATH_MAX];
 		int len;
@@ -1824,7 +2049,7 @@ int bpf_object__pin(struct bpf_object *obj, const char *path)
 		else if (len >= PATH_MAX)
 			return -ENAMETOOLONG;
 
-		err = bpf_program__pin(prog, buf);
+		err = bpf_program__unpin(prog, buf);
 		if (err)
 			return err;
 	}
@@ -1832,6 +2057,23 @@ int bpf_object__pin(struct bpf_object *obj, const char *path)
 	return 0;
 }
 
+int bpf_object__pin(struct bpf_object *obj, const char *path)
+{
+	int err;
+
+	err = bpf_object__pin_maps(obj, path);
+	if (err)
+		return err;
+
+	err = bpf_object__pin_programs(obj, path);
+	if (err) {
+		bpf_object__unpin_maps(obj, path);
+		return err;
+	}
+
+	return 0;
+}
+
 void bpf_object__close(struct bpf_object *obj)
 {
 	size_t i;
@@ -1918,23 +2160,20 @@ void *bpf_object__priv(struct bpf_object *obj)
 }
 
 static struct bpf_program *
-__bpf_program__next(struct bpf_program *prev, struct bpf_object *obj)
+__bpf_program__iter(struct bpf_program *p, struct bpf_object *obj, int i)
 {
-	size_t idx;
+	ssize_t idx;
 
 	if (!obj->programs)
 		return NULL;
-	/* First handler */
-	if (prev == NULL)
-		return &obj->programs[0];
 
-	if (prev->obj != obj) {
+	if (p->obj != obj) {
 		pr_warning("error: program handler doesn't match object\n");
 		return NULL;
 	}
 
-	idx = (prev - obj->programs) + 1;
-	if (idx >= obj->nr_programs)
+	idx = (p - obj->programs) + i;
+	if (idx >= obj->nr_programs || idx < 0)
 		return NULL;
 	return &obj->programs[idx];
 }
@@ -1944,8 +2183,29 @@ bpf_program__next(struct bpf_program *prev, struct bpf_object *obj)
 {
 	struct bpf_program *prog = prev;
 
+	if (prev == NULL)
+		return obj->programs;
+
 	do {
-		prog = __bpf_program__next(prog, obj);
+		prog = __bpf_program__iter(prog, obj, 1);
+	} while (prog && bpf_program__is_function_storage(prog, obj));
+
+	return prog;
+}
+
+struct bpf_program *
+bpf_program__prev(struct bpf_program *next, struct bpf_object *obj)
+{
+	struct bpf_program *prog = next;
+
+	if (next == NULL) {
+		if (!obj->nr_programs)
+			return NULL;
+		return obj->programs + obj->nr_programs - 1;
+	}
+
+	do {
+		prog = __bpf_program__iter(prog, obj, -1);
 	} while (prog && bpf_program__is_function_storage(prog, obj));
 
 	return prog;
@@ -2272,10 +2532,10 @@ void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
 	map->map_ifindex = ifindex;
 }
 
-struct bpf_map *
-bpf_map__next(struct bpf_map *prev, struct bpf_object *obj)
+static struct bpf_map *
+__bpf_map__iter(struct bpf_map *m, struct bpf_object *obj, int i)
 {
-	size_t idx;
+	ssize_t idx;
 	struct bpf_map *s, *e;
 
 	if (!obj || !obj->maps)
@@ -2284,21 +2544,39 @@ bpf_map__next(struct bpf_map *prev, struct bpf_object *obj)
 	s = obj->maps;
 	e = obj->maps + obj->nr_maps;
 
-	if (prev == NULL)
-		return s;
-
-	if ((prev < s) || (prev >= e)) {
+	if ((m < s) || (m >= e)) {
 		pr_warning("error in %s: map handler doesn't belong to object\n",
 			   __func__);
 		return NULL;
 	}
 
-	idx = (prev - obj->maps) + 1;
-	if (idx >= obj->nr_maps)
+	idx = (m - obj->maps) + i;
+	if (idx >= obj->nr_maps || idx < 0)
 		return NULL;
 	return &obj->maps[idx];
 }
 
+struct bpf_map *
+bpf_map__next(struct bpf_map *prev, struct bpf_object *obj)
+{
+	if (prev == NULL)
+		return obj->maps;
+
+	return __bpf_map__iter(prev, obj, 1);
+}
+
+struct bpf_map *
+bpf_map__prev(struct bpf_map *next, struct bpf_object *obj)
+{
+	if (next == NULL) {
+		if (!obj->nr_maps)
+			return NULL;
+		return obj->maps + obj->nr_maps - 1;
+	}
+
+	return __bpf_map__iter(next, obj, -1);
+}
+
 struct bpf_map *
 bpf_object__find_map_by_name(struct bpf_object *obj, const char *name)
 {
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 1f3468dad8b2c..b1686a7871022 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -71,6 +71,13 @@ struct bpf_object *__bpf_object__open_xattr(struct bpf_object_open_attr *attr,
 LIBBPF_API struct bpf_object *bpf_object__open_buffer(void *obj_buf,
 						      size_t obj_buf_sz,
 						      const char *name);
+LIBBPF_API int bpf_object__pin_maps(struct bpf_object *obj, const char *path);
+LIBBPF_API int bpf_object__unpin_maps(struct bpf_object *obj,
+				      const char *path);
+LIBBPF_API int bpf_object__pin_programs(struct bpf_object *obj,
+					const char *path);
+LIBBPF_API int bpf_object__unpin_programs(struct bpf_object *obj,
+					  const char *path);
 LIBBPF_API int bpf_object__pin(struct bpf_object *object, const char *path);
 LIBBPF_API void bpf_object__close(struct bpf_object *object);
 
@@ -112,6 +119,9 @@ LIBBPF_API struct bpf_program *bpf_program__next(struct bpf_program *prog,
 	     (pos) != NULL;				\
 	     (pos) = bpf_program__next((pos), (obj)))
 
+LIBBPF_API struct bpf_program *bpf_program__prev(struct bpf_program *prog,
+						 struct bpf_object *obj);
+
 typedef void (*bpf_program_clear_priv_t)(struct bpf_program *,
 					 void *);
 
@@ -131,7 +141,11 @@ LIBBPF_API int bpf_program__fd(struct bpf_program *prog);
 LIBBPF_API int bpf_program__pin_instance(struct bpf_program *prog,
 					 const char *path,
 					 int instance);
+LIBBPF_API int bpf_program__unpin_instance(struct bpf_program *prog,
+					   const char *path,
+					   int instance);
 LIBBPF_API int bpf_program__pin(struct bpf_program *prog, const char *path);
+LIBBPF_API int bpf_program__unpin(struct bpf_program *prog, const char *path);
 LIBBPF_API void bpf_program__unload(struct bpf_program *prog);
 
 struct bpf_insn;
@@ -260,6 +274,9 @@ bpf_map__next(struct bpf_map *map, struct bpf_object *obj);
 	     (pos) != NULL;				\
 	     (pos) = bpf_map__next((pos), (obj)))
 
+LIBBPF_API struct bpf_map *
+bpf_map__prev(struct bpf_map *map, struct bpf_object *obj);
+
 LIBBPF_API int bpf_map__fd(struct bpf_map *map);
 LIBBPF_API const struct bpf_map_def *bpf_map__def(struct bpf_map *map);
 LIBBPF_API const char *bpf_map__name(struct bpf_map *map);
@@ -274,6 +291,7 @@ LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd);
 LIBBPF_API bool bpf_map__is_offload_neutral(struct bpf_map *map);
 LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);
 LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path);
+LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path);
 
 LIBBPF_API long libbpf_get_error(const void *ptr);
 

From fd734c5cca62b7630703244d3613be135d646a0e Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Fri, 9 Nov 2018 08:21:42 -0800
Subject: [PATCH 21/71] libbpf: bpf_program__pin: add special case for
 instances.nr == 1

When bpf_program has only one instance, don't create a subdirectory with
per-instance pin files (<prog>/0). Instead, just create a single pin file
for that single instance. This simplifies object pinning by not creating
unnecessary subdirectories.

This can potentially break existing users that depend on the case
where '/0' is always created. However, I couldn't find any serious
usage of bpf_program__pin inside the kernel tree and I suppose there
should be none outside.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/libbpf.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 341008f47c8a5..97ce9f2140021 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1761,6 +1761,11 @@ int bpf_program__pin(struct bpf_program *prog, const char *path)
 		return -EINVAL;
 	}
 
+	if (prog->instances.nr == 1) {
+		/* don't create subdirs when pinning single instance */
+		return bpf_program__pin_instance(prog, path, 0);
+	}
+
 	err = make_dir(path);
 	if (err)
 		return err;
@@ -1823,6 +1828,11 @@ int bpf_program__unpin(struct bpf_program *prog, const char *path)
 		return -EINVAL;
 	}
 
+	if (prog->instances.nr == 1) {
+		/* don't create subdirs when pinning single instance */
+		return bpf_program__unpin_instance(prog, path, 0);
+	}
+
 	for (i = 0; i < prog->instances.nr; i++) {
 		char buf[PATH_MAX];
 		int len;

From 33a2c75c55e24aa30ff9fed805ae8bea13c1e2a3 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Fri, 9 Nov 2018 08:21:43 -0800
Subject: [PATCH 22/71] libbpf: add internal pin_name

pin_name is the same as section_name where '/' is replaced
by '_'. bpf_object__pin_programs is converted to use pin_name
to avoid the situation where section_name would require creating another
subdirectory for a pin (as, for example, when calling bpf_object__pin_programs
for programs in sections like "cgroup/connect6").

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/libbpf.c | 29 ++++++++++++++++++++++++++---
 1 file changed, 26 insertions(+), 3 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 97ce9f2140021..e827542ffa3af 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -124,6 +124,10 @@ struct bpf_program {
 	char *name;
 	int prog_ifindex;
 	char *section_name;
+	/* section_name with / replaced by _; makes recursive pinning
+	 * in bpf_object__pin_programs easier
+	 */
+	char *pin_name;
 	struct bpf_insn *insns;
 	size_t insns_cnt, main_prog_cnt;
 	enum bpf_prog_type type;
@@ -253,6 +257,7 @@ static void bpf_program__exit(struct bpf_program *prog)
 	bpf_program__unload(prog);
 	zfree(&prog->name);
 	zfree(&prog->section_name);
+	zfree(&prog->pin_name);
 	zfree(&prog->insns);
 	zfree(&prog->reloc_desc);
 
@@ -261,6 +266,17 @@ static void bpf_program__exit(struct bpf_program *prog)
 	prog->idx = -1;
 }
 
+static char *__bpf_program__pin_name(struct bpf_program *prog)
+{
+	char *name, *p;
+
+	name = p = strdup(prog->section_name);
+	while ((p = strchr(p, '/')))
+		*p = '_';
+
+	return name;
+}
+
 static int
 bpf_program__init(void *data, size_t size, char *section_name, int idx,
 		  struct bpf_program *prog)
@@ -279,6 +295,13 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx,
 		goto errout;
 	}
 
+	prog->pin_name = __bpf_program__pin_name(prog);
+	if (!prog->pin_name) {
+		pr_warning("failed to alloc pin name for prog under section(%d) %s\n",
+			   idx, section_name);
+		goto errout;
+	}
+
 	prog->insns = malloc(size);
 	if (!prog->insns) {
 		pr_warning("failed to alloc insns for prog under section %s\n",
@@ -2006,7 +2029,7 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
 		int len;
 
 		len = snprintf(buf, PATH_MAX, "%s/%s", path,
-			       prog->section_name);
+			       prog->pin_name);
 		if (len < 0) {
 			err = -EINVAL;
 			goto err_unpin_programs;
@@ -2028,7 +2051,7 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
 		int len;
 
 		len = snprintf(buf, PATH_MAX, "%s/%s", path,
-			       prog->section_name);
+			       prog->pin_name);
 		if (len < 0)
 			continue;
 		else if (len >= PATH_MAX)
@@ -2053,7 +2076,7 @@ int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
 		int len;
 
 		len = snprintf(buf, PATH_MAX, "%s/%s", path,
-			       prog->section_name);
+			       prog->pin_name);
 		if (len < 0)
 			return -EINVAL;
 		else if (len >= PATH_MAX)

From 77380998d91dee8aafdbe42634776ba1ef692f1e Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Fri, 9 Nov 2018 08:21:44 -0800
Subject: [PATCH 23/71] bpftool: add loadall command

This patch adds new *loadall* command which slightly differs from the
existing *load*. *load* command loads all programs from the obj file,
but pins only the first programs. *loadall* pins all programs from the
obj file under specified directory.

The intended usecase is flow_dissector, where we want to load a bunch
of progs, pin them all and after that construct a jump table.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../bpftool/Documentation/bpftool-prog.rst    | 14 ++--
 tools/bpf/bpftool/bash-completion/bpftool     |  4 +-
 tools/bpf/bpftool/common.c                    | 31 ++++----
 tools/bpf/bpftool/main.h                      |  1 +
 tools/bpf/bpftool/prog.c                      | 74 ++++++++++++++-----
 5 files changed, 81 insertions(+), 43 deletions(-)

diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index ac4e904b10fbd..984d125c507af 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -15,7 +15,8 @@ SYNOPSIS
 	*OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
 
 	*COMMANDS* :=
-	{ **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load** | **help** }
+	{ **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load**
+	| **loadall** | **help** }
 
 MAP COMMANDS
 =============
@@ -24,7 +25,7 @@ MAP COMMANDS
 |	**bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes** | **visual**}]
 |	**bpftool** **prog dump jited**  *PROG* [{**file** *FILE* | **opcodes**}]
 |	**bpftool** **prog pin** *PROG* *FILE*
-|	**bpftool** **prog load** *OBJ* *FILE* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*]
+|	**bpftool** **prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*]
 |       **bpftool** **prog attach** *PROG* *ATTACH_TYPE* *MAP*
 |       **bpftool** **prog detach** *PROG* *ATTACH_TYPE* *MAP*
 |	**bpftool** **prog help**
@@ -79,8 +80,11 @@ DESCRIPTION
 		  contain a dot character ('.'), which is reserved for future
 		  extensions of *bpffs*.
 
-	**bpftool prog load** *OBJ* *FILE* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*]
-		  Load bpf program from binary *OBJ* and pin as *FILE*.
+	**bpftool prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*]
+		  Load bpf program(s) from binary *OBJ* and pin as *PATH*.
+		  **bpftool prog load** pins only the first program from the
+		  *OBJ* as *PATH*. **bpftool prog loadall** pins all programs
+		  from the *OBJ* under *PATH* directory.
 		  **type** is optional, if not specified program type will be
 		  inferred from section names.
 		  By default bpftool will create new maps as declared in the ELF
@@ -93,7 +97,7 @@ DESCRIPTION
 		  If **dev** *NAME* is specified program will be loaded onto
 		  given networking device (offload).
 
-		  Note: *FILE* must be located in *bpffs* mount. It must not
+		  Note: *PATH* must be located in *bpffs* mount. It must not
 		  contain a dot character ('.'), which is reserved for future
 		  extensions of *bpffs*.
 
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 3f78e6404589f..780ebafb756a5 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -243,7 +243,7 @@ _bpftool()
     # Completion depends on object and command in use
     case $object in
         prog)
-            if [[ $command != "load" ]]; then
+            if [[ $command != "load" && $command != "loadall" ]]; then
                 case $prev in
                     id)
                         _bpftool_get_prog_ids
@@ -309,7 +309,7 @@ _bpftool()
                     fi
                     return 0
                     ;;
-                load)
+                load|loadall)
                     local obj
 
                     if [[ ${#words[@]} -lt 6 ]]; then
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index 1149565be4b16..cb06a5b6e0162 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -177,34 +177,23 @@ int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type)
 	return fd;
 }
 
-int do_pin_fd(int fd, const char *name)
+int mount_bpffs_for_pin(const char *name)
 {
 	char err_str[ERR_MAX_LEN];
 	char *file;
 	char *dir;
 	int err = 0;
 
-	err = bpf_obj_pin(fd, name);
-	if (!err)
-		goto out;
-
 	file = malloc(strlen(name) + 1);
 	strcpy(file, name);
 	dir = dirname(file);
 
-	if (errno != EPERM || is_bpffs(dir)) {
-		p_err("can't pin the object (%s): %s", name, strerror(errno));
+	if (is_bpffs(dir))
+		/* nothing to do if already mounted */
 		goto out_free;
-	}
 
-	/* Attempt to mount bpffs, then retry pinning. */
 	err = mnt_bpffs(dir, err_str, ERR_MAX_LEN);
-	if (!err) {
-		err = bpf_obj_pin(fd, name);
-		if (err)
-			p_err("can't pin the object (%s): %s", name,
-			      strerror(errno));
-	} else {
+	if (err) {
 		err_str[ERR_MAX_LEN - 1] = '\0';
 		p_err("can't mount BPF file system to pin the object (%s): %s",
 		      name, err_str);
@@ -212,10 +201,20 @@ int do_pin_fd(int fd, const char *name)
 
 out_free:
 	free(file);
-out:
 	return err;
 }
 
+int do_pin_fd(int fd, const char *name)
+{
+	int err;
+
+	err = mount_bpffs_for_pin(name);
+	if (err)
+		return err;
+
+	return bpf_obj_pin(fd, name);
+}
+
 int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32))
 {
 	unsigned int id;
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 14857c273bf67..61d82020af58e 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -131,6 +131,7 @@ const char *get_fd_type_name(enum bpf_obj_type type);
 char *get_fdinfo(int fd, const char *key);
 int open_obj_pinned(char *path);
 int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type);
+int mount_bpffs_for_pin(const char *name);
 int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32));
 int do_pin_fd(int fd, const char *name);
 
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index b9b84553bec44..97d930042cd54 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -792,15 +792,16 @@ static int do_detach(int argc, char **argv)
 		jsonw_null(json_wtr);
 	return 0;
 }
-static int do_load(int argc, char **argv)
+
+static int load_with_options(int argc, char **argv, bool first_prog_only)
 {
 	enum bpf_attach_type expected_attach_type;
 	struct bpf_object_open_attr attr = {
 		.prog_type	= BPF_PROG_TYPE_UNSPEC,
 	};
 	struct map_replace *map_replace = NULL;
+	struct bpf_program *prog = NULL, *pos;
 	unsigned int old_map_fds = 0;
-	struct bpf_program *prog;
 	struct bpf_object *obj;
 	struct bpf_map *map;
 	const char *pinfile;
@@ -918,26 +919,25 @@ static int do_load(int argc, char **argv)
 		goto err_free_reuse_maps;
 	}
 
-	prog = bpf_program__next(NULL, obj);
-	if (!prog) {
-		p_err("object file doesn't contain any bpf program");
-		goto err_close_obj;
-	}
+	bpf_object__for_each_program(pos, obj) {
+		enum bpf_prog_type prog_type = attr.prog_type;
 
-	bpf_program__set_ifindex(prog, ifindex);
-	if (attr.prog_type == BPF_PROG_TYPE_UNSPEC) {
-		const char *sec_name = bpf_program__title(prog, false);
+		if (attr.prog_type == BPF_PROG_TYPE_UNSPEC) {
+			const char *sec_name = bpf_program__title(pos, false);
 
-		err = libbpf_prog_type_by_name(sec_name, &attr.prog_type,
-					       &expected_attach_type);
-		if (err < 0) {
-			p_err("failed to guess program type based on section name %s\n",
-			      sec_name);
-			goto err_close_obj;
+			err = libbpf_prog_type_by_name(sec_name, &prog_type,
+						       &expected_attach_type);
+			if (err < 0) {
+				p_err("failed to guess program type based on section name %s\n",
+				      sec_name);
+				goto err_close_obj;
+			}
 		}
+
+		bpf_program__set_ifindex(pos, ifindex);
+		bpf_program__set_type(pos, prog_type);
+		bpf_program__set_expected_attach_type(pos, expected_attach_type);
 	}
-	bpf_program__set_type(prog, attr.prog_type);
-	bpf_program__set_expected_attach_type(prog, expected_attach_type);
 
 	qsort(map_replace, old_map_fds, sizeof(*map_replace),
 	      map_replace_compar);
@@ -1003,9 +1003,31 @@ static int do_load(int argc, char **argv)
 		goto err_close_obj;
 	}
 
-	if (do_pin_fd(bpf_program__fd(prog), pinfile))
+	err = mount_bpffs_for_pin(pinfile);
+	if (err)
 		goto err_close_obj;
 
+	if (first_prog_only) {
+		prog = bpf_program__next(NULL, obj);
+		if (!prog) {
+			p_err("object file doesn't contain any bpf program");
+			goto err_close_obj;
+		}
+
+		err = bpf_obj_pin(bpf_program__fd(prog), pinfile);
+		if (err) {
+			p_err("failed to pin program %s",
+			      bpf_program__title(prog, false));
+			goto err_close_obj;
+		}
+	} else {
+		err = bpf_object__pin_programs(obj, pinfile);
+		if (err) {
+			p_err("failed to pin all programs");
+			goto err_close_obj;
+		}
+	}
+
 	if (json_output)
 		jsonw_null(json_wtr);
 
@@ -1025,6 +1047,16 @@ static int do_load(int argc, char **argv)
 	return -1;
 }
 
+static int do_load(int argc, char **argv)
+{
+	return load_with_options(argc, argv, true);
+}
+
+static int do_loadall(int argc, char **argv)
+{
+	return load_with_options(argc, argv, false);
+}
+
 static int do_help(int argc, char **argv)
 {
 	if (json_output) {
@@ -1037,7 +1069,8 @@ static int do_help(int argc, char **argv)
 		"       %s %s dump xlated PROG [{ file FILE | opcodes | visual }]\n"
 		"       %s %s dump jited  PROG [{ file FILE | opcodes }]\n"
 		"       %s %s pin   PROG FILE\n"
-		"       %s %s load  OBJ  FILE [type TYPE] [dev NAME] \\\n"
+		"       %s %s { load | loadall } OBJ  PATH \\\n"
+		"                         [type TYPE] [dev NAME] \\\n"
 		"                         [map { idx IDX | name NAME } MAP]\n"
 		"       %s %s attach PROG ATTACH_TYPE MAP\n"
 		"       %s %s detach PROG ATTACH_TYPE MAP\n"
@@ -1069,6 +1102,7 @@ static const struct cmd cmds[] = {
 	{ "dump",	do_dump },
 	{ "pin",	do_pin },
 	{ "load",	do_load },
+	{ "loadall",	do_loadall },
 	{ "attach",	do_attach },
 	{ "detach",	do_detach },
 	{ 0 }

From 3767a94b3253fc8c3df96913d7dec796619161c7 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Fri, 9 Nov 2018 08:21:45 -0800
Subject: [PATCH 24/71] bpftool: add pinmaps argument to the load/loadall

This new additional argument lets users pin all maps from the object at
specified path.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../bpftool/Documentation/bpftool-prog.rst    |  4 +++-
 tools/bpf/bpftool/bash-completion/bpftool     |  3 ++-
 tools/bpf/bpftool/prog.c                      | 24 ++++++++++++++++++-
 3 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 984d125c507af..15e9172f7e55d 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -80,7 +80,7 @@ DESCRIPTION
 		  contain a dot character ('.'), which is reserved for future
 		  extensions of *bpffs*.
 
-	**bpftool prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*]
+	**bpftool prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*]
 		  Load bpf program(s) from binary *OBJ* and pin as *PATH*.
 		  **bpftool prog load** pins only the first program from the
 		  *OBJ* as *PATH*. **bpftool prog loadall** pins all programs
@@ -96,6 +96,8 @@ DESCRIPTION
 		  use, referring to it by **id** or through a **pinned** file.
 		  If **dev** *NAME* is specified program will be loaded onto
 		  given networking device (offload).
+		  Optional **pinmaps** argument can be provided to pin all
+		  maps under *MAP_DIR* directory.
 
 		  Note: *PATH* must be located in *bpffs* mount. It must not
 		  contain a dot character ('.'), which is reserved for future
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 780ebafb756a5..a05d0071f39f1 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -346,7 +346,7 @@ _bpftool()
                             _bpftool_get_map_ids
                             return 0
                             ;;
-                        pinned)
+                        pinned|pinmaps)
                             _filedir
                             return 0
                             ;;
@@ -358,6 +358,7 @@ _bpftool()
                             COMPREPLY=( $( compgen -W "map" -- "$cur" ) )
                             _bpftool_once_attr 'type'
                             _bpftool_once_attr 'dev'
+                            _bpftool_once_attr 'pinmaps'
                             return 0
                             ;;
                     esac
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 97d930042cd54..c2ce4220bbca0 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -802,6 +802,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
 	struct map_replace *map_replace = NULL;
 	struct bpf_program *prog = NULL, *pos;
 	unsigned int old_map_fds = 0;
+	const char *pinmaps = NULL;
 	struct bpf_object *obj;
 	struct bpf_map *map;
 	const char *pinfile;
@@ -906,6 +907,13 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
 				goto err_free_reuse_maps;
 			}
 			NEXT_ARG();
+		} else if (is_prefix(*argv, "pinmaps")) {
+			NEXT_ARG();
+
+			if (!REQ_ARGS(1))
+				goto err_free_reuse_maps;
+
+			pinmaps = GET_ARG();
 		} else {
 			p_err("expected no more arguments, 'type', 'map' or 'dev', got: '%s'?",
 			      *argv);
@@ -1028,6 +1036,14 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
 		}
 	}
 
+	if (pinmaps) {
+		err = bpf_object__pin_maps(obj, pinmaps);
+		if (err) {
+			p_err("failed to pin all maps");
+			goto err_unpin;
+		}
+	}
+
 	if (json_output)
 		jsonw_null(json_wtr);
 
@@ -1038,6 +1054,11 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
 
 	return 0;
 
+err_unpin:
+	if (first_prog_only)
+		unlink(pinfile);
+	else
+		bpf_object__unpin_programs(obj, pinfile);
 err_close_obj:
 	bpf_object__close(obj);
 err_free_reuse_maps:
@@ -1071,7 +1092,8 @@ static int do_help(int argc, char **argv)
 		"       %s %s pin   PROG FILE\n"
 		"       %s %s { load | loadall } OBJ  PATH \\\n"
 		"                         [type TYPE] [dev NAME] \\\n"
-		"                         [map { idx IDX | name NAME } MAP]\n"
+		"                         [map { idx IDX | name NAME } MAP]\\\n"
+		"                         [pinmaps MAP_DIR]\n"
 		"       %s %s attach PROG ATTACH_TYPE MAP\n"
 		"       %s %s detach PROG ATTACH_TYPE MAP\n"
 		"       %s %s help\n"

From 092f08927300086b6520dfa3aa4d9450266f27ae Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Fri, 9 Nov 2018 08:21:46 -0800
Subject: [PATCH 25/71] bpftool: support loading flow dissector

This commit adds support for loading/attaching/detaching flow
dissector program.

When `bpftool loadall` is called with a flow_dissector prog (i.e. when the
'type flow_dissector' argument is passed), we load and pin all programs.
User is responsible to construct the jump table for the tail calls.

The last argument of `bpftool attach` is made optional for this use
case.

Example:
bpftool prog load tools/testing/selftests/bpf/bpf_flow.o \
        /sys/fs/bpf/flow type flow_dissector \
	pinmaps /sys/fs/bpf/flow

bpftool map update pinned /sys/fs/bpf/flow/jmp_table \
        key 0 0 0 0 \
        value pinned /sys/fs/bpf/flow/IP

bpftool map update pinned /sys/fs/bpf/flow/jmp_table \
        key 1 0 0 0 \
        value pinned /sys/fs/bpf/flow/IPV6

bpftool map update pinned /sys/fs/bpf/flow/jmp_table \
        key 2 0 0 0 \
        value pinned /sys/fs/bpf/flow/IPV6OP

bpftool map update pinned /sys/fs/bpf/flow/jmp_table \
        key 3 0 0 0 \
        value pinned /sys/fs/bpf/flow/IPV6FR

bpftool map update pinned /sys/fs/bpf/flow/jmp_table \
        key 4 0 0 0 \
        value pinned /sys/fs/bpf/flow/MPLS

bpftool map update pinned /sys/fs/bpf/flow/jmp_table \
        key 5 0 0 0 \
        value pinned /sys/fs/bpf/flow/VLAN

bpftool prog attach pinned /sys/fs/bpf/flow/flow_dissector flow_dissector

Tested by using the above lines to load the prog in
the test_flow_dissector.sh selftest.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../bpftool/Documentation/bpftool-prog.rst    | 26 +++---
 tools/bpf/bpftool/bash-completion/bpftool     | 14 ++-
 tools/bpf/bpftool/prog.c                      | 85 ++++++++++---------
 3 files changed, 74 insertions(+), 51 deletions(-)

diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 15e9172f7e55d..8db78ed82a71f 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -26,8 +26,8 @@ MAP COMMANDS
 |	**bpftool** **prog dump jited**  *PROG* [{**file** *FILE* | **opcodes**}]
 |	**bpftool** **prog pin** *PROG* *FILE*
 |	**bpftool** **prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*]
-|       **bpftool** **prog attach** *PROG* *ATTACH_TYPE* *MAP*
-|       **bpftool** **prog detach** *PROG* *ATTACH_TYPE* *MAP*
+|       **bpftool** **prog attach** *PROG* *ATTACH_TYPE* [*MAP*]
+|       **bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*]
 |	**bpftool** **prog help**
 |
 |	*MAP* := { **id** *MAP_ID* | **pinned** *FILE* }
@@ -40,7 +40,9 @@ MAP COMMANDS
 |		**cgroup/bind4** | **cgroup/bind6** | **cgroup/post_bind4** | **cgroup/post_bind6** |
 |		**cgroup/connect4** | **cgroup/connect6** | **cgroup/sendmsg4** | **cgroup/sendmsg6**
 |	}
-|       *ATTACH_TYPE* := { **msg_verdict** | **skb_verdict** | **skb_parse** }
+|       *ATTACH_TYPE* := {
+|		**msg_verdict** | **skb_verdict** | **skb_parse** | **flow_dissector**
+|	}
 
 
 DESCRIPTION
@@ -103,13 +105,17 @@ DESCRIPTION
 		  contain a dot character ('.'), which is reserved for future
 		  extensions of *bpffs*.
 
-        **bpftool prog attach** *PROG* *ATTACH_TYPE* *MAP*
-                  Attach bpf program *PROG* (with type specified by *ATTACH_TYPE*)
-                  to the map *MAP*.
-
-        **bpftool prog detach** *PROG* *ATTACH_TYPE* *MAP*
-                  Detach bpf program *PROG* (with type specified by *ATTACH_TYPE*)
-                  from the map *MAP*.
+	**bpftool prog attach** *PROG* *ATTACH_TYPE* [*MAP*]
+		  Attach bpf program *PROG* (with type specified by
+		  *ATTACH_TYPE*). Most *ATTACH_TYPEs* require a *MAP*
+		  parameter, with the exception of *flow_dissector* which is
+		  attached to current networking name space.
+
+	**bpftool prog detach** *PROG* *ATTACH_TYPE* [*MAP*]
+		  Detach bpf program *PROG* (with type specified by
+		  *ATTACH_TYPE*). Most *ATTACH_TYPEs* require a *MAP*
+		  parameter, with the exception of *flow_dissector* which is
+		  detached from the current networking name space.
 
 	**bpftool prog help**
 		  Print short help message.
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index a05d0071f39f1..45c2db257d2bf 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -299,7 +299,8 @@ _bpftool()
                     fi
 
                     if [[ ${#words[@]} == 6 ]]; then
-                        COMPREPLY=( $( compgen -W "msg_verdict skb_verdict skb_parse" -- "$cur" ) )
+                        COMPREPLY=( $( compgen -W "msg_verdict skb_verdict \
+                            skb_parse flow_dissector" -- "$cur" ) )
                         return 0
                     fi
 
@@ -338,7 +339,16 @@ _bpftool()
 
                     case $prev in
                         type)
-                            COMPREPLY=( $( compgen -W "socket kprobe kretprobe classifier action tracepoint raw_tracepoint xdp perf_event cgroup/skb cgroup/sock cgroup/dev lwt_in lwt_out lwt_xmit lwt_seg6local sockops sk_skb sk_msg lirc_mode2 cgroup/bind4 cgroup/bind6 cgroup/connect4 cgroup/connect6 cgroup/sendmsg4 cgroup/sendmsg6 cgroup/post_bind4 cgroup/post_bind6" -- \
+                            COMPREPLY=( $( compgen -W "socket kprobe \
+                                kretprobe classifier flow_dissector \
+                                action tracepoint raw_tracepoint \
+                                xdp perf_event cgroup/skb cgroup/sock \
+                                cgroup/dev lwt_in lwt_out lwt_xmit \
+                                lwt_seg6local sockops sk_skb sk_msg \
+                                lirc_mode2 cgroup/bind4 cgroup/bind6 \
+                                cgroup/connect4 cgroup/connect6 \
+                                cgroup/sendmsg4 cgroup/sendmsg6 \
+                                cgroup/post_bind4 cgroup/post_bind6" -- \
                                                    "$cur" ) )
                             return 0
                             ;;
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index c2ce4220bbca0..5ff5544596e71 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -81,6 +81,7 @@ static const char * const attach_type_strings[] = {
 	[BPF_SK_SKB_STREAM_PARSER] = "stream_parser",
 	[BPF_SK_SKB_STREAM_VERDICT] = "stream_verdict",
 	[BPF_SK_MSG_VERDICT] = "msg_verdict",
+	[BPF_FLOW_DISSECTOR] = "flow_dissector",
 	[__MAX_BPF_ATTACH_TYPE] = NULL,
 };
 
@@ -721,30 +722,49 @@ int map_replace_compar(const void *p1, const void *p2)
 	return a->idx - b->idx;
 }
 
-static int do_attach(int argc, char **argv)
+static int parse_attach_detach_args(int argc, char **argv, int *progfd,
+				    enum bpf_attach_type *attach_type,
+				    int *mapfd)
 {
-	enum bpf_attach_type attach_type;
-	int err, mapfd, progfd;
-
-	if (!REQ_ARGS(5)) {
-		p_err("too few parameters for map attach");
+	if (!REQ_ARGS(3))
 		return -EINVAL;
-	}
 
-	progfd = prog_parse_fd(&argc, &argv);
-	if (progfd < 0)
-		return progfd;
+	*progfd = prog_parse_fd(&argc, &argv);
+	if (*progfd < 0)
+		return *progfd;
 
-	attach_type = parse_attach_type(*argv);
-	if (attach_type == __MAX_BPF_ATTACH_TYPE) {
-		p_err("invalid attach type");
+	*attach_type = parse_attach_type(*argv);
+	if (*attach_type == __MAX_BPF_ATTACH_TYPE) {
+		p_err("invalid attach/detach type");
 		return -EINVAL;
 	}
+
+	if (*attach_type == BPF_FLOW_DISSECTOR) {
+		*mapfd = -1;
+		return 0;
+	}
+
 	NEXT_ARG();
+	if (!REQ_ARGS(2))
+		return -EINVAL;
+
+	*mapfd = map_parse_fd(&argc, &argv);
+	if (*mapfd < 0)
+		return *mapfd;
+
+	return 0;
+}
 
-	mapfd = map_parse_fd(&argc, &argv);
-	if (mapfd < 0)
-		return mapfd;
+static int do_attach(int argc, char **argv)
+{
+	enum bpf_attach_type attach_type;
+	int err, progfd;
+	int mapfd;
+
+	err = parse_attach_detach_args(argc, argv,
+				       &progfd, &attach_type, &mapfd);
+	if (err)
+		return err;
 
 	err = bpf_prog_attach(progfd, mapfd, attach_type, 0);
 	if (err) {
@@ -760,27 +780,13 @@ static int do_attach(int argc, char **argv)
 static int do_detach(int argc, char **argv)
 {
 	enum bpf_attach_type attach_type;
-	int err, mapfd, progfd;
-
-	if (!REQ_ARGS(5)) {
-		p_err("too few parameters for map detach");
-		return -EINVAL;
-	}
-
-	progfd = prog_parse_fd(&argc, &argv);
-	if (progfd < 0)
-		return progfd;
+	int err, progfd;
+	int mapfd;
 
-	attach_type = parse_attach_type(*argv);
-	if (attach_type == __MAX_BPF_ATTACH_TYPE) {
-		p_err("invalid attach type");
-		return -EINVAL;
-	}
-	NEXT_ARG();
-
-	mapfd = map_parse_fd(&argc, &argv);
-	if (mapfd < 0)
-		return mapfd;
+	err = parse_attach_detach_args(argc, argv,
+				       &progfd, &attach_type, &mapfd);
+	if (err)
+		return err;
 
 	err = bpf_prog_detach2(progfd, mapfd, attach_type);
 	if (err) {
@@ -1094,8 +1100,8 @@ static int do_help(int argc, char **argv)
 		"                         [type TYPE] [dev NAME] \\\n"
 		"                         [map { idx IDX | name NAME } MAP]\\\n"
 		"                         [pinmaps MAP_DIR]\n"
-		"       %s %s attach PROG ATTACH_TYPE MAP\n"
-		"       %s %s detach PROG ATTACH_TYPE MAP\n"
+		"       %s %s attach PROG ATTACH_TYPE [MAP]\n"
+		"       %s %s detach PROG ATTACH_TYPE [MAP]\n"
 		"       %s %s help\n"
 		"\n"
 		"       " HELP_SPEC_MAP "\n"
@@ -1107,7 +1113,8 @@ static int do_help(int argc, char **argv)
 		"                 cgroup/bind4 | cgroup/bind6 | cgroup/post_bind4 |\n"
 		"                 cgroup/post_bind6 | cgroup/connect4 | cgroup/connect6 |\n"
 		"                 cgroup/sendmsg4 | cgroup/sendmsg6 }\n"
-		"       ATTACH_TYPE := { msg_verdict | skb_verdict | skb_parse }\n"
+		"       ATTACH_TYPE := { msg_verdict | skb_verdict | skb_parse |\n"
+		"                        flow_dissector }\n"
 		"       " HELP_SPEC_OPTIONS "\n"
 		"",
 		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],

From 46f53a65d2de3e1591636c22b626b09d8684fd71 Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Sat, 10 Nov 2018 22:15:13 -0800
Subject: [PATCH 26/71] bpf: Allow narrow loads with offset > 0

Currently BPF verifier allows narrow loads for a context field only with
offset zero. E.g. if there is a __u32 field then only the following
loads are permitted:
  * off=0, size=1 (narrow);
  * off=0, size=2 (narrow);
  * off=0, size=4 (full).

On the other hand LLVM can generate a load with offset different than
zero that make sense from program logic point of view, but verifier
doesn't accept it.

E.g. tools/testing/selftests/bpf/sendmsg4_prog.c has code:

  #define DST_IP4			0xC0A801FEU /* 192.168.1.254 */
  ...
  	if ((ctx->user_ip4 >> 24) == (bpf_htonl(DST_IP4) >> 24) &&

where ctx is struct bpf_sock_addr.

Some versions of LLVM can produce the following byte code for it:

       8:       71 12 07 00 00 00 00 00         r2 = *(u8 *)(r1 + 7)
       9:       67 02 00 00 18 00 00 00         r2 <<= 24
      10:       18 03 00 00 00 00 00 fe 00 00 00 00 00 00 00 00         r3 = 4261412864 ll
      12:       5d 32 07 00 00 00 00 00         if r2 != r3 goto +7 <LBB0_6>

where `*(u8 *)(r1 + 7)` means narrow load for ctx->user_ip4 with size=1
and offset=3 (7 - sizeof(ctx->user_family) = 3). This load is currently
rejected by verifier.

Verifier code that rejects such loads is in bpf_ctx_narrow_access_ok()
what means any is_valid_access implementation, that uses the function,
works this way, e.g. bpf_skb_is_valid_access() for __sk_buff or
sock_addr_is_valid_access() for bpf_sock_addr.

The patch makes such loads supported. Offset can be in [0; size_default)
but has to be multiple of load size. E.g. for __u32 field the following
loads are supported now:
  * off=0, size=1 (narrow);
  * off=1, size=1 (narrow);
  * off=2, size=1 (narrow);
  * off=3, size=1 (narrow);
  * off=0, size=2 (narrow);
  * off=2, size=2 (narrow);
  * off=0, size=4 (full).

Reported-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/filter.h | 16 +---------------
 kernel/bpf/verifier.c  | 21 ++++++++++++++++-----
 2 files changed, 17 insertions(+), 20 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index de629b706d1d7..cc17f5f32fbb6 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -668,24 +668,10 @@ static inline u32 bpf_ctx_off_adjust_machine(u32 size)
 	return size;
 }
 
-static inline bool bpf_ctx_narrow_align_ok(u32 off, u32 size_access,
-					   u32 size_default)
-{
-	size_default = bpf_ctx_off_adjust_machine(size_default);
-	size_access  = bpf_ctx_off_adjust_machine(size_access);
-
-#ifdef __LITTLE_ENDIAN
-	return (off & (size_default - 1)) == 0;
-#else
-	return (off & (size_default - 1)) + size_access == size_default;
-#endif
-}
-
 static inline bool
 bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default)
 {
-	return bpf_ctx_narrow_align_ok(off, size, size_default) &&
-	       size <= size_default && (size & (size - 1)) == 0;
+	return size <= size_default && (size & (size - 1)) == 0;
 }
 
 #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0]))
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 8d0977980cfa1..b5222aa61d543 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5718,10 +5718,10 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
 	int i, cnt, size, ctx_field_size, delta = 0;
 	const int insn_cnt = env->prog->len;
 	struct bpf_insn insn_buf[16], *insn;
+	u32 target_size, size_default, off;
 	struct bpf_prog *new_prog;
 	enum bpf_access_type type;
 	bool is_narrower_load;
-	u32 target_size;
 
 	if (ops->gen_prologue || env->seen_direct_write) {
 		if (!ops->gen_prologue) {
@@ -5814,9 +5814,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
 		 * we will apply proper mask to the result.
 		 */
 		is_narrower_load = size < ctx_field_size;
+		size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
+		off = insn->off;
 		if (is_narrower_load) {
-			u32 size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
-			u32 off = insn->off;
 			u8 size_code;
 
 			if (type == BPF_WRITE) {
@@ -5844,12 +5844,23 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
 		}
 
 		if (is_narrower_load && size < target_size) {
-			if (ctx_field_size <= 4)
+			u8 shift = (off & (size_default - 1)) * 8;
+
+			if (ctx_field_size <= 4) {
+				if (shift)
+					insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
+									insn->dst_reg,
+									shift);
 				insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
 								(1 << size * 8) - 1);
-			else
+			} else {
+				if (shift)
+					insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
+									insn->dst_reg,
+									shift);
 				insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
 								(1 << size * 8) - 1);
+			}
 		}
 
 		new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);

From 6c2afb674dbda9b736b8f09c976516e1e788860a Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Sat, 10 Nov 2018 22:15:14 -0800
Subject: [PATCH 27/71] selftests/bpf: Test narrow loads with off > 0 in
 test_verifier

Test the following narrow loads in test_verifier for context __sk_buff:
* off=1, size=1 - ok;
* off=2, size=1 - ok;
* off=3, size=1 - ok;
* off=0, size=2 - ok;
* off=1, size=2 - fail;
* off=0, size=2 - ok;
* off=3, size=2 - fail.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_verifier.c | 48 ++++++++++++++++-----
 1 file changed, 38 insertions(+), 10 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 6f61df62f690c..54d16fbdef8b9 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -2026,29 +2026,27 @@ static struct bpf_test tests[] = {
 		.result = ACCEPT,
 	},
 	{
-		"check skb->hash byte load not permitted 1",
+		"check skb->hash byte load permitted 1",
 		.insns = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
 				    offsetof(struct __sk_buff, hash) + 1),
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "invalid bpf_context access",
-		.result = REJECT,
+		.result = ACCEPT,
 	},
 	{
-		"check skb->hash byte load not permitted 2",
+		"check skb->hash byte load permitted 2",
 		.insns = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
 				    offsetof(struct __sk_buff, hash) + 2),
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "invalid bpf_context access",
-		.result = REJECT,
+		.result = ACCEPT,
 	},
 	{
-		"check skb->hash byte load not permitted 3",
+		"check skb->hash byte load permitted 3",
 		.insns = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 #if __BYTE_ORDER == __LITTLE_ENDIAN
@@ -2060,8 +2058,7 @@ static struct bpf_test tests[] = {
 #endif
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "invalid bpf_context access",
-		.result = REJECT,
+		.result = ACCEPT,
 	},
 	{
 		"check cb access: byte, wrong type",
@@ -2173,7 +2170,7 @@ static struct bpf_test tests[] = {
 		.result = ACCEPT,
 	},
 	{
-		"check skb->hash half load not permitted",
+		"check skb->hash half load permitted 2",
 		.insns = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 #if __BYTE_ORDER == __LITTLE_ENDIAN
@@ -2182,6 +2179,37 @@ static struct bpf_test tests[] = {
 #else
 			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
 				    offsetof(struct __sk_buff, hash)),
+#endif
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"check skb->hash half load not permitted, unaligned 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, hash) + 1),
+#else
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, hash) + 3),
+#endif
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"check skb->hash half load not permitted, unaligned 3",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, hash) + 3),
+#else
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, hash) + 1),
 #endif
 			BPF_EXIT_INSN(),
 		},

From e7605475f5f1af58668701b5ffe7763bdeb28527 Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Sat, 10 Nov 2018 22:15:15 -0800
Subject: [PATCH 28/71] selftests/bpf: Test narrow loads with off > 0 for
 bpf_sock_addr

Add more test cases for context bpf_sock_addr to test narrow loads with
offset > 0 for ctx->user_ip4 field (__u32):
* off=1, size=1;
* off=2, size=1;
* off=3, size=1;
* off=2, size=2.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_sock_addr.c | 28 +++++++++++++++++---
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
index aeeb76a54d633..73b7493d41209 100644
--- a/tools/testing/selftests/bpf/test_sock_addr.c
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -574,24 +574,44 @@ static int bind4_prog_load(const struct sock_addr_test *test)
 		/* if (sk.family == AF_INET && */
 		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
 			    offsetof(struct bpf_sock_addr, family)),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 16),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 24),
 
 		/*     (sk.type == SOCK_DGRAM || sk.type == SOCK_STREAM) && */
 		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
 			    offsetof(struct bpf_sock_addr, type)),
 		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_DGRAM, 1),
 		BPF_JMP_A(1),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_STREAM, 12),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_STREAM, 20),
 
 		/*     1st_byte_of_user_ip4 == expected && */
 		BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
 			    offsetof(struct bpf_sock_addr, user_ip4)),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[0], 10),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[0], 18),
+
+		/*     2nd_byte_of_user_ip4 == expected && */
+		BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
+			    offsetof(struct bpf_sock_addr, user_ip4) + 1),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[1], 16),
+
+		/*     3rd_byte_of_user_ip4 == expected && */
+		BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
+			    offsetof(struct bpf_sock_addr, user_ip4) + 2),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[2], 14),
+
+		/*     4th_byte_of_user_ip4 == expected && */
+		BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
+			    offsetof(struct bpf_sock_addr, user_ip4) + 3),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[3], 12),
 
 		/*     1st_half_of_user_ip4 == expected && */
 		BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
 			    offsetof(struct bpf_sock_addr, user_ip4)),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[0], 8),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[0], 10),
+
+		/*     2nd_half_of_user_ip4 == expected && */
+		BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
+			    offsetof(struct bpf_sock_addr, user_ip4) + 2),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[1], 8),
 
 		/*     whole_user_ip4 == expected) { */
 		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,

From 5c86d2125b58949122e03f04ce940e6f5b8534ba Mon Sep 17 00:00:00 2001
From: Joe Stringer <joe@wand.net.nz>
Date: Fri, 9 Nov 2018 10:18:16 -0800
Subject: [PATCH 29/71] selftests/bpf: Fix uninitialized duration warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Daniel Borkmann reports:

test_progs.c: In function ‘main’:
test_progs.c:81:3: warning: ‘duration’ may be used uninitialized in this function [-Wmaybe-uninitialized]
   printf("%s:PASS:%s %d nsec\n", __func__, tag, duration);\
   ^~~~~~
test_progs.c:1706:8: note: ‘duration’ was declared here
  __u32 duration;
        ^~~~~~~~

Signed-off-by: Joe Stringer <joe@wand.net.nz>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_progs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 2d3c04f455302..c1e688f61061c 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -1703,7 +1703,7 @@ static void test_reference_tracking()
 	const char *file = "./test_sk_lookup_kern.o";
 	struct bpf_object *obj;
 	struct bpf_program *prog;
-	__u32 duration;
+	__u32 duration = 0;
 	int err = 0;
 
 	obj = bpf_object__open(file);

From a83d6e76a67424ebbbbed643f51e97934ffc2bc2 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Mon, 12 Nov 2018 15:44:53 -0800
Subject: [PATCH 30/71] bpf: libbpf: Fix bpf_program__next() API

This patch restores the behavior in
commit eac7d84519a3 ("tools: libbpf: don't return '.text' as a program for multi-function programs")
such that bpf_program__next() does not return pseudo programs in ".text".

Fixes: 0c19a9fbc9cd ("libbpf: cleanup after partial failure in bpf_object__pin")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/libbpf.c | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index e827542ffa3af..a01eb9584e522 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -2193,19 +2193,25 @@ void *bpf_object__priv(struct bpf_object *obj)
 }
 
 static struct bpf_program *
-__bpf_program__iter(struct bpf_program *p, struct bpf_object *obj, int i)
+__bpf_program__iter(struct bpf_program *p, struct bpf_object *obj, bool forward)
 {
+	size_t nr_programs = obj->nr_programs;
 	ssize_t idx;
 
-	if (!obj->programs)
+	if (!nr_programs)
 		return NULL;
 
+	if (!p)
+		/* Iter from the beginning */
+		return forward ? &obj->programs[0] :
+			&obj->programs[nr_programs - 1];
+
 	if (p->obj != obj) {
 		pr_warning("error: program handler doesn't match object\n");
 		return NULL;
 	}
 
-	idx = (p - obj->programs) + i;
+	idx = (p - obj->programs) + (forward ? 1 : -1);
 	if (idx >= obj->nr_programs || idx < 0)
 		return NULL;
 	return &obj->programs[idx];
@@ -2216,11 +2222,8 @@ bpf_program__next(struct bpf_program *prev, struct bpf_object *obj)
 {
 	struct bpf_program *prog = prev;
 
-	if (prev == NULL)
-		return obj->programs;
-
 	do {
-		prog = __bpf_program__iter(prog, obj, 1);
+		prog = __bpf_program__iter(prog, obj, true);
 	} while (prog && bpf_program__is_function_storage(prog, obj));
 
 	return prog;
@@ -2231,14 +2234,8 @@ bpf_program__prev(struct bpf_program *next, struct bpf_object *obj)
 {
 	struct bpf_program *prog = next;
 
-	if (next == NULL) {
-		if (!obj->nr_programs)
-			return NULL;
-		return obj->programs + obj->nr_programs - 1;
-	}
-
 	do {
-		prog = __bpf_program__iter(prog, obj, -1);
+		prog = __bpf_program__iter(prog, obj, false);
 	} while (prog && bpf_program__is_function_storage(prog, obj));
 
 	return prog;

From ac8acec9912a93be9953446766e0bb73aeeecc64 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Mon, 12 Nov 2018 15:10:42 -0700
Subject: [PATCH 31/71] bpf: Remove unused variable in nsim_bpf

Clang warns:

drivers/net/netdevsim/bpf.c:557:30: error: unused variable 'state'
[-Werror,-Wunused-variable]
        struct nsim_bpf_bound_prog *state;
                                    ^
1 error generated.

The declaration should have been removed in commit b07ade27e933 ("bpf:
pass translate() as a callback and remove its ndo_bpf subcommand").

Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 drivers/net/netdevsim/bpf.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c
index 6a5b7bd9a1f96..a1b29173ca1ce 100644
--- a/drivers/net/netdevsim/bpf.c
+++ b/drivers/net/netdevsim/bpf.c
@@ -554,7 +554,6 @@ static void nsim_bpf_map_free(struct bpf_offloaded_map *offmap)
 int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf)
 {
 	struct netdevsim *ns = netdev_priv(dev);
-	struct nsim_bpf_bound_prog *state;
 	int err;
 
 	ASSERT_RTNL();

From cac6cc2f5ac710334ae0f6bba5630d791c253574 Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Fri, 9 Nov 2018 10:54:00 -0800
Subject: [PATCH 32/71] bpf: Fix IPv6 dport byte order in bpf_sk_lookup_udp

Lookup functions in sk_lookup have different expectations about byte
order of provided arguments.

Specifically __inet_lookup, __udp4_lib_lookup and __udp6_lib_lookup
expect dport to be in network byte order and do ntohs(dport) internally.

At the same time __inet6_lookup expects dport to be in host byte order
and correspondingly name the argument hnum.

sk_lookup works correctly with __inet_lookup, __udp4_lib_lookup and
__inet6_lookup with regard to dport. But in __udp6_lib_lookup case it
uses host instead of expected network byte order. It makes result
returned by bpf_sk_lookup_udp for IPv6 incorrect.

The patch fixes byte order of dport passed to __udp6_lib_lookup.

Originally sk_lookup properly handled UDPv6, but not TCPv6. 5ef0ae84f02a
fixes TCPv6 but breaks UDPv6.

Fixes: 5ef0ae84f02a ("bpf: Fix IPv6 dport byte-order in bpf_sk_lookup")
Signed-off-by: Andrey Ignatov <rdna@fb.com>
Acked-by: Joe Stringer <joe@wand.net.nz>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 net/core/filter.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index 53d50fb75ea18..f4ae933edf619 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4867,17 +4867,16 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
 	} else {
 		struct in6_addr *src6 = (struct in6_addr *)&tuple->ipv6.saddr;
 		struct in6_addr *dst6 = (struct in6_addr *)&tuple->ipv6.daddr;
-		u16 hnum = ntohs(tuple->ipv6.dport);
 
 		if (proto == IPPROTO_TCP)
 			sk = __inet6_lookup(net, &tcp_hashinfo, NULL, 0,
 					    src6, tuple->ipv6.sport,
-					    dst6, hnum,
+					    dst6, ntohs(tuple->ipv6.dport),
 					    dif, sdif, &refcounted);
 		else if (likely(ipv6_bpf_stub))
 			sk = ipv6_bpf_stub->udp6_lib_lookup(net,
 							    src6, tuple->ipv6.sport,
-							    dst6, hnum,
+							    dst6, tuple->ipv6.dport,
 							    dif, sdif,
 							    &udp_table, NULL);
 #endif

From 6c49e65e0d462963b4fac97ebd87014342167027 Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Fri, 9 Nov 2018 10:54:01 -0800
Subject: [PATCH 33/71] bpf: Support socket lookup in CGROUP_SOCK_ADDR progs

Make bpf_sk_lookup_tcp, bpf_sk_lookup_udp and bpf_sk_release helpers
available in programs of type BPF_PROG_TYPE_CGROUP_SOCK_ADDR.

Such programs operate on sockets and have access to socket and struct
sockaddr passed by user to system calls such as sys_bind, sys_connect,
sys_sendmsg.

It's useful to be able to lookup other sockets from these programs.
E.g. sys_connect may lookup IP:port endpoint and if there is a server
socket bound to that endpoint ("server" can be defined by saddr & sport
being zero), redirect client connection to it by rewriting IP:port in
sockaddr passed to sys_connect.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 net/core/filter.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/net/core/filter.c b/net/core/filter.c
index f4ae933edf619..f6ca38a7d4332 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5042,6 +5042,43 @@ static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = {
 	.arg4_type      = ARG_ANYTHING,
 	.arg5_type      = ARG_ANYTHING,
 };
+
+BPF_CALL_5(bpf_sock_addr_sk_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
+	   struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
+{
+	return __bpf_sk_lookup(NULL, tuple, len, sock_net(ctx->sk), 0,
+			       IPPROTO_TCP, netns_id, flags);
+}
+
+static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = {
+	.func		= bpf_sock_addr_sk_lookup_tcp,
+	.gpl_only	= false,
+	.ret_type	= RET_PTR_TO_SOCKET_OR_NULL,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_PTR_TO_MEM,
+	.arg3_type	= ARG_CONST_SIZE,
+	.arg4_type	= ARG_ANYTHING,
+	.arg5_type	= ARG_ANYTHING,
+};
+
+BPF_CALL_5(bpf_sock_addr_sk_lookup_udp, struct bpf_sock_addr_kern *, ctx,
+	   struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
+{
+	return __bpf_sk_lookup(NULL, tuple, len, sock_net(ctx->sk), 0,
+			       IPPROTO_UDP, netns_id, flags);
+}
+
+static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = {
+	.func		= bpf_sock_addr_sk_lookup_udp,
+	.gpl_only	= false,
+	.ret_type	= RET_PTR_TO_SOCKET_OR_NULL,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_PTR_TO_MEM,
+	.arg3_type	= ARG_CONST_SIZE,
+	.arg4_type	= ARG_ANYTHING,
+	.arg5_type	= ARG_ANYTHING,
+};
+
 #endif /* CONFIG_INET */
 
 bool bpf_helper_changes_pkt_data(void *func)
@@ -5148,6 +5185,14 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_get_socket_cookie_sock_addr_proto;
 	case BPF_FUNC_get_local_storage:
 		return &bpf_get_local_storage_proto;
+#ifdef CONFIG_INET
+	case BPF_FUNC_sk_lookup_tcp:
+		return &bpf_sock_addr_sk_lookup_tcp_proto;
+	case BPF_FUNC_sk_lookup_udp:
+		return &bpf_sock_addr_sk_lookup_udp_proto;
+	case BPF_FUNC_sk_release:
+		return &bpf_sk_release_proto;
+#endif /* CONFIG_INET */
 	default:
 		return bpf_base_func_proto(func_id);
 	}

From 9108e3a023d3e4e77d94b589b07d397b0a790285 Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Fri, 9 Nov 2018 10:54:02 -0800
Subject: [PATCH 34/71] selftest/bpf: Use bpf_sk_lookup_{tcp, udp} in
 test_sock_addr

Use bpf_sk_lookup_tcp, bpf_sk_lookup_udp and bpf_sk_release helpers from
test_sock_addr programs to make sure they're available and can lookup
and release socket properly for IPv4/IPv4, TCP/UDP.

Reading from a few fields of returned struct bpf_sock is also tested.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/connect4_prog.c | 43 ++++++++++++----
 tools/testing/selftests/bpf/connect6_prog.c | 56 ++++++++++++++++-----
 2 files changed, 78 insertions(+), 21 deletions(-)

diff --git a/tools/testing/selftests/bpf/connect4_prog.c b/tools/testing/selftests/bpf/connect4_prog.c
index 5a88a681d2abc..b8395f3c43e96 100644
--- a/tools/testing/selftests/bpf/connect4_prog.c
+++ b/tools/testing/selftests/bpf/connect4_prog.c
@@ -21,23 +21,48 @@ int _version SEC("version") = 1;
 SEC("cgroup/connect4")
 int connect_v4_prog(struct bpf_sock_addr *ctx)
 {
+	struct bpf_sock_tuple tuple = {};
 	struct sockaddr_in sa;
+	struct bpf_sock *sk;
+
+	/* Verify that new destination is available. */
+	memset(&tuple.ipv4.saddr, 0, sizeof(tuple.ipv4.saddr));
+	memset(&tuple.ipv4.sport, 0, sizeof(tuple.ipv4.sport));
+
+	tuple.ipv4.daddr = bpf_htonl(DST_REWRITE_IP4);
+	tuple.ipv4.dport = bpf_htons(DST_REWRITE_PORT4);
+
+	if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
+		return 0;
+	else if (ctx->type == SOCK_STREAM)
+		sk = bpf_sk_lookup_tcp(ctx, &tuple, sizeof(tuple.ipv4), 0, 0);
+	else
+		sk = bpf_sk_lookup_udp(ctx, &tuple, sizeof(tuple.ipv4), 0, 0);
+
+	if (!sk)
+		return 0;
+
+	if (sk->src_ip4 != tuple.ipv4.daddr ||
+	    sk->src_port != DST_REWRITE_PORT4) {
+		bpf_sk_release(sk);
+		return 0;
+	}
+
+	bpf_sk_release(sk);
 
 	/* Rewrite destination. */
 	ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4);
 	ctx->user_port = bpf_htons(DST_REWRITE_PORT4);
 
-	if (ctx->type == SOCK_DGRAM || ctx->type == SOCK_STREAM) {
-		///* Rewrite source. */
-		memset(&sa, 0, sizeof(sa));
+	/* Rewrite source. */
+	memset(&sa, 0, sizeof(sa));
 
-		sa.sin_family = AF_INET;
-		sa.sin_port = bpf_htons(0);
-		sa.sin_addr.s_addr = bpf_htonl(SRC_REWRITE_IP4);
+	sa.sin_family = AF_INET;
+	sa.sin_port = bpf_htons(0);
+	sa.sin_addr.s_addr = bpf_htonl(SRC_REWRITE_IP4);
 
-		if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
-			return 0;
-	}
+	if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
+		return 0;
 
 	return 1;
 }
diff --git a/tools/testing/selftests/bpf/connect6_prog.c b/tools/testing/selftests/bpf/connect6_prog.c
index 8ea3f7d12deec..25f5dc7b7aa05 100644
--- a/tools/testing/selftests/bpf/connect6_prog.c
+++ b/tools/testing/selftests/bpf/connect6_prog.c
@@ -29,7 +29,41 @@ int _version SEC("version") = 1;
 SEC("cgroup/connect6")
 int connect_v6_prog(struct bpf_sock_addr *ctx)
 {
+	struct bpf_sock_tuple tuple = {};
 	struct sockaddr_in6 sa;
+	struct bpf_sock *sk;
+
+	/* Verify that new destination is available. */
+	memset(&tuple.ipv6.saddr, 0, sizeof(tuple.ipv6.saddr));
+	memset(&tuple.ipv6.sport, 0, sizeof(tuple.ipv6.sport));
+
+	tuple.ipv6.daddr[0] = bpf_htonl(DST_REWRITE_IP6_0);
+	tuple.ipv6.daddr[1] = bpf_htonl(DST_REWRITE_IP6_1);
+	tuple.ipv6.daddr[2] = bpf_htonl(DST_REWRITE_IP6_2);
+	tuple.ipv6.daddr[3] = bpf_htonl(DST_REWRITE_IP6_3);
+
+	tuple.ipv6.dport = bpf_htons(DST_REWRITE_PORT6);
+
+	if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
+		return 0;
+	else if (ctx->type == SOCK_STREAM)
+		sk = bpf_sk_lookup_tcp(ctx, &tuple, sizeof(tuple.ipv6), 0, 0);
+	else
+		sk = bpf_sk_lookup_udp(ctx, &tuple, sizeof(tuple.ipv6), 0, 0);
+
+	if (!sk)
+		return 0;
+
+	if (sk->src_ip6[0] != tuple.ipv6.daddr[0] ||
+	    sk->src_ip6[1] != tuple.ipv6.daddr[1] ||
+	    sk->src_ip6[2] != tuple.ipv6.daddr[2] ||
+	    sk->src_ip6[3] != tuple.ipv6.daddr[3] ||
+	    sk->src_port != DST_REWRITE_PORT6) {
+		bpf_sk_release(sk);
+		return 0;
+	}
+
+	bpf_sk_release(sk);
 
 	/* Rewrite destination. */
 	ctx->user_ip6[0] = bpf_htonl(DST_REWRITE_IP6_0);
@@ -39,21 +73,19 @@ int connect_v6_prog(struct bpf_sock_addr *ctx)
 
 	ctx->user_port = bpf_htons(DST_REWRITE_PORT6);
 
-	if (ctx->type == SOCK_DGRAM || ctx->type == SOCK_STREAM) {
-		/* Rewrite source. */
-		memset(&sa, 0, sizeof(sa));
+	/* Rewrite source. */
+	memset(&sa, 0, sizeof(sa));
 
-		sa.sin6_family = AF_INET6;
-		sa.sin6_port = bpf_htons(0);
+	sa.sin6_family = AF_INET6;
+	sa.sin6_port = bpf_htons(0);
 
-		sa.sin6_addr.s6_addr32[0] = bpf_htonl(SRC_REWRITE_IP6_0);
-		sa.sin6_addr.s6_addr32[1] = bpf_htonl(SRC_REWRITE_IP6_1);
-		sa.sin6_addr.s6_addr32[2] = bpf_htonl(SRC_REWRITE_IP6_2);
-		sa.sin6_addr.s6_addr32[3] = bpf_htonl(SRC_REWRITE_IP6_3);
+	sa.sin6_addr.s6_addr32[0] = bpf_htonl(SRC_REWRITE_IP6_0);
+	sa.sin6_addr.s6_addr32[1] = bpf_htonl(SRC_REWRITE_IP6_1);
+	sa.sin6_addr.s6_addr32[2] = bpf_htonl(SRC_REWRITE_IP6_2);
+	sa.sin6_addr.s6_addr32[3] = bpf_htonl(SRC_REWRITE_IP6_3);
 
-		if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
-			return 0;
-	}
+	if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
+		return 0;
 
 	return 1;
 }

From 29a9c10e4110e368443f0b606d71557edee7f2cc Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Mon, 12 Nov 2018 13:44:10 -0800
Subject: [PATCH 35/71] bpftool: make libbfd optional

Make it possible to build bpftool without libbfd. libbfd and libopcodes are
typically provided in dev/dbg packages (binutils-dev in debian) which we
usually don't have installed on the fleet machines and we'd like a way to have
bpftool version that works without installing any additional packages.
This excludes support for disassembling jit-ted code and prints an error if
the user tries to use these features.

Tested by:
cat > FEATURES_DUMP.bpftool <<EOF
feature-libbfd=0
feature-disassembler-four-args=1
feature-reallocarray=0
feature-libelf=1
feature-libelf-mmap=1
feature-bpf=1
EOF
FEATURES_DUMP=$PWD/FEATURES_DUMP.bpftool make
ldd bpftool | grep libbfd

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/Makefile     | 13 +++++++++++--
 tools/bpf/bpftool/jit_disasm.c |  8 +++++++-
 tools/bpf/bpftool/main.c       |  3 ---
 tools/bpf/bpftool/main.h       | 14 ++++++++++++++
 tools/bpf/bpftool/prog.c       |  3 +++
 5 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index dac7eff4c7e5d..1bea6b9790823 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -53,7 +53,7 @@ ifneq ($(EXTRA_LDFLAGS),)
 LDFLAGS += $(EXTRA_LDFLAGS)
 endif
 
-LIBS = -lelf -lbfd -lopcodes $(LIBBPF)
+LIBS = -lelf $(LIBBPF)
 
 INSTALL ?= install
 RM ?= rm -f
@@ -90,7 +90,16 @@ include $(wildcard $(OUTPUT)*.d)
 
 all: $(OUTPUT)bpftool
 
-SRCS = $(wildcard *.c)
+BFD_SRCS = jit_disasm.c
+
+SRCS = $(filter-out $(BFD_SRCS),$(wildcard *.c))
+
+ifeq ($(feature-libbfd),1)
+CFLAGS += -DHAVE_LIBBFD_SUPPORT
+SRCS += $(BFD_SRCS)
+LIBS += -lbfd -lopcodes
+endif
+
 OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o
 
 $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c
index c75ffd9ce2bb3..b2ed5ee1af5fe 100644
--- a/tools/bpf/bpftool/jit_disasm.c
+++ b/tools/bpf/bpftool/jit_disasm.c
@@ -109,7 +109,7 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
 		if (inf) {
 			bfdf->arch_info = inf;
 		} else {
-			p_err("No libfd support for %s", arch);
+			p_err("No libbfd support for %s", arch);
 			return;
 		}
 	}
@@ -183,3 +183,9 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
 
 	bfd_close(bfdf);
 }
+
+int disasm_init(void)
+{
+	bfd_init();
+	return 0;
+}
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 75a3296dc0bc8..5c4c1cd5a7ba2 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -31,7 +31,6 @@
  * SOFTWARE.
  */
 
-#include <bfd.h>
 #include <ctype.h>
 #include <errno.h>
 #include <getopt.h>
@@ -399,8 +398,6 @@ int main(int argc, char **argv)
 	if (argc < 0)
 		usage();
 
-	bfd_init();
-
 	ret = cmd_select(cmds, argc, argv, do_help);
 
 	if (json_output)
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 61d82020af58e..10c6c16fae29a 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -147,8 +147,22 @@ int prog_parse_fd(int *argc, char ***argv);
 int map_parse_fd(int *argc, char ***argv);
 int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len);
 
+#ifdef HAVE_LIBBFD_SUPPORT
 void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
 		       const char *arch, const char *disassembler_options);
+int disasm_init(void);
+#else
+static inline
+void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
+		       const char *arch, const char *disassembler_options)
+{
+}
+static inline int disasm_init(void)
+{
+	p_err("No libbfd support");
+	return -1;
+}
+#endif
 void print_data_json(uint8_t *data, size_t len);
 void print_hex_data_json(uint8_t *data, size_t len);
 
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 5ff5544596e71..c176e1aa66fe1 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -467,6 +467,9 @@ static int do_dump(int argc, char **argv)
 	int fd;
 
 	if (is_prefix(*argv, "jited")) {
+		if (disasm_init())
+			return -1;
+
 		member_len = &info.jited_prog_len;
 		member_ptr = &info.jited_prog_insns;
 	} else if (is_prefix(*argv, "xlated")) {

From 592ee43faf860c1f2c0a4c11838db6fdb974bb78 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 13 Nov 2018 09:29:26 +0000
Subject: [PATCH 36/71] bpf: fix null pointer dereference on pointer offload

Pointer offload is being null checked however the following statement
dereferences the potentially null pointer offload when assigning
offload->dev_state.  Fix this by only assigning it if offload is not
null.

Detected by CoverityScan, CID#1475437 ("Dereference after null check")

Fixes: 00db12c3d141 ("bpf: call verifier_prep from its callback in struct bpf_offload_dev")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/offload.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 52c5617e37160..54cf2b9c44a45 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -130,9 +130,10 @@ int bpf_prog_offload_verifier_prep(struct bpf_prog *prog)
 
 	down_read(&bpf_devs_lock);
 	offload = prog->aux->offload;
-	if (offload)
+	if (offload) {
 		ret = offload->offdev->ops->prepare(prog);
-	offload->dev_state = !ret;
+		offload->dev_state = !ret;
+	}
 	up_read(&bpf_devs_lock);
 
 	return ret;

From 23499442c319412aa8e54e7a939e2eb531bdd77d Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Mon, 19 Nov 2018 14:49:01 -0800
Subject: [PATCH 37/71] bpf: libbpf: retry map creation without the name

Since commit 88cda1c9da02 ("bpf: libbpf: Provide basic API support
to specify BPF obj name"), libbpf unconditionally sets bpf_attr->name
for maps. Pre v4.14 kernels don't know about map names and return an
error about unexpected non-zero data. Retry sys_bpf without a map
name to cover older kernels.

v2 changes:
* check for errno == EINVAL as suggested by Daniel Borkmann

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/bpf.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 03f9bcc4ef501..961e1b9fc5927 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -69,6 +69,7 @@ int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
 {
 	__u32 name_len = create_attr->name ? strlen(create_attr->name) : 0;
 	union bpf_attr attr;
+	int ret;
 
 	memset(&attr, '\0', sizeof(attr));
 
@@ -86,7 +87,15 @@ int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
 	attr.map_ifindex = create_attr->map_ifindex;
 	attr.inner_map_fd = create_attr->inner_map_fd;
 
-	return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+	ret = sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+	if (ret < 0 && errno == EINVAL && create_attr->name) {
+		/* Retry the same syscall, but without the name.
+		 * Pre v4.14 kernels don't support map names.
+		 */
+		memset(attr.map_name, 0, sizeof(attr.map_name));
+		return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+	}
+	return ret;
 }
 
 int bpf_create_map_node(enum bpf_map_type map_type, const char *name,

From 96b3b6c9091d23289721350e32c63cc8749686be Mon Sep 17 00:00:00 2001
From: Lorenz Bauer <lmb@cloudflare.com>
Date: Fri, 16 Nov 2018 11:41:08 +0000
Subject: [PATCH 38/71] bpf: allow zero-initializing hash map seed

Add a new flag BPF_F_ZERO_SEED, which forces a hash map
to initialize the seed to zero. This is useful when doing
performance analysis both on individual BPF programs, as
well as the kernel's hash table implementation.

Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/uapi/linux/bpf.h |  3 +++
 kernel/bpf/hashtab.c     | 13 +++++++++++--
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 47d606d744cc6..8c01b89a4cb41 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -269,6 +269,9 @@ enum bpf_attach_type {
 /* Flag for stack_map, store build_id+offset instead of pointer */
 #define BPF_F_STACK_BUILD_ID	(1U << 5)
 
+/* Zero-initialize hash function seed. This should only be used for testing. */
+#define BPF_F_ZERO_SEED		(1U << 6)
+
 enum bpf_stack_build_id_status {
 	/* user space need an empty entry to identify end of a trace */
 	BPF_STACK_BUILD_ID_EMPTY = 0,
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 2c17902881387..4b7c76765d9d6 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -23,7 +23,7 @@
 
 #define HTAB_CREATE_FLAG_MASK						\
 	(BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE |	\
-	 BPF_F_RDONLY | BPF_F_WRONLY)
+	 BPF_F_RDONLY | BPF_F_WRONLY | BPF_F_ZERO_SEED)
 
 struct bucket {
 	struct hlist_nulls_head head;
@@ -244,6 +244,7 @@ static int htab_map_alloc_check(union bpf_attr *attr)
 	 */
 	bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU);
 	bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC);
+	bool zero_seed = (attr->map_flags & BPF_F_ZERO_SEED);
 	int numa_node = bpf_map_attr_numa_node(attr);
 
 	BUILD_BUG_ON(offsetof(struct htab_elem, htab) !=
@@ -257,6 +258,10 @@ static int htab_map_alloc_check(union bpf_attr *attr)
 		 */
 		return -EPERM;
 
+	if (zero_seed && !capable(CAP_SYS_ADMIN))
+		/* Guard against local DoS, and discourage production use. */
+		return -EPERM;
+
 	if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK)
 		/* reserved bits should not be used */
 		return -EINVAL;
@@ -373,7 +378,11 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
 	if (!htab->buckets)
 		goto free_htab;
 
-	htab->hashrnd = get_random_int();
+	if (htab->map.map_flags & BPF_F_ZERO_SEED)
+		htab->hashrnd = 0;
+	else
+		htab->hashrnd = get_random_int();
+
 	for (i = 0; i < htab->n_buckets; i++) {
 		INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i);
 		raw_spin_lock_init(&htab->buckets[i].lock);

From 2f1833607aed6a9c1e1729bf0e2588c341ceb409 Mon Sep 17 00:00:00 2001
From: Lorenz Bauer <lmb@cloudflare.com>
Date: Fri, 16 Nov 2018 11:41:09 +0000
Subject: [PATCH 39/71] bpf: move BPF_F_QUERY_EFFECTIVE after map flags

BPF_F_QUERY_EFFECTIVE is in the middle of the flags valid
for BPF_MAP_CREATE. Move it to its own section to reduce confusion.

Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/uapi/linux/bpf.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 8c01b89a4cb41..05d95290b8486 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -257,9 +257,6 @@ enum bpf_attach_type {
 /* Specify numa node during map creation */
 #define BPF_F_NUMA_NODE		(1U << 2)
 
-/* flags for BPF_PROG_QUERY */
-#define BPF_F_QUERY_EFFECTIVE	(1U << 0)
-
 #define BPF_OBJ_NAME_LEN 16U
 
 /* Flags for accessing BPF object */
@@ -272,6 +269,9 @@ enum bpf_attach_type {
 /* Zero-initialize hash function seed. This should only be used for testing. */
 #define BPF_F_ZERO_SEED		(1U << 6)
 
+/* flags for BPF_PROG_QUERY */
+#define BPF_F_QUERY_EFFECTIVE	(1U << 0)
+
 enum bpf_stack_build_id_status {
 	/* user space need an empty entry to identify end of a trace */
 	BPF_STACK_BUILD_ID_EMPTY = 0,

From 608114e441ad3a4fa1fced4d6d00653a34765eee Mon Sep 17 00:00:00 2001
From: Lorenz Bauer <lmb@cloudflare.com>
Date: Fri, 16 Nov 2018 11:41:10 +0000
Subject: [PATCH 40/71] tools: sync linux/bpf.h

Synchronize changes to linux/bpf.h from
* "bpf: allow zero-initializing hash map seed"
* "bpf: move BPF_F_QUERY_EFFECTIVE after map flags"

Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/include/uapi/linux/bpf.h | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 852dc17ab47a0..05d95290b8486 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -257,9 +257,6 @@ enum bpf_attach_type {
 /* Specify numa node during map creation */
 #define BPF_F_NUMA_NODE		(1U << 2)
 
-/* flags for BPF_PROG_QUERY */
-#define BPF_F_QUERY_EFFECTIVE	(1U << 0)
-
 #define BPF_OBJ_NAME_LEN 16U
 
 /* Flags for accessing BPF object */
@@ -269,6 +266,12 @@ enum bpf_attach_type {
 /* Flag for stack_map, store build_id+offset instead of pointer */
 #define BPF_F_STACK_BUILD_ID	(1U << 5)
 
+/* Zero-initialize hash function seed. This should only be used for testing. */
+#define BPF_F_ZERO_SEED		(1U << 6)
+
+/* flags for BPF_PROG_QUERY */
+#define BPF_F_QUERY_EFFECTIVE	(1U << 0)
+
 enum bpf_stack_build_id_status {
 	/* user space need an empty entry to identify end of a trace */
 	BPF_STACK_BUILD_ID_EMPTY = 0,
@@ -2201,6 +2204,8 @@ union bpf_attr {
  *		**CONFIG_NET** configuration option.
  *	Return
  *		Pointer to *struct bpf_sock*, or NULL in case of failure.
+ *		For sockets with reuseport option, *struct bpf_sock*
+ *		return is from reuse->socks[] using hash of the packet.
  *
  * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags)
  *	Description
@@ -2233,6 +2238,8 @@ union bpf_attr {
  *		**CONFIG_NET** configuration option.
  *	Return
  *		Pointer to *struct bpf_sock*, or NULL in case of failure.
+ *		For sockets with reuseport option, *struct bpf_sock*
+ *		return is from reuse->socks[] using hash of the packet.
  *
  * int bpf_sk_release(struct bpf_sock *sk)
  *	Description

From bf5d68c7304008ef838f1a2ca1aae8fab74d633d Mon Sep 17 00:00:00 2001
From: Lorenz Bauer <lmb@cloudflare.com>
Date: Fri, 16 Nov 2018 11:41:11 +0000
Subject: [PATCH 41/71] tools: add selftest for BPF_F_ZERO_SEED

Check that iterating two separate hash maps produces the same
order of keys if BPF_F_ZERO_SEED is used.

Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_maps.c | 64 +++++++++++++++++++++----
 1 file changed, 55 insertions(+), 9 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 4db2116e52be4..9f0a5b16a2469 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -258,24 +258,36 @@ static void test_hashmap_percpu(int task, void *data)
 	close(fd);
 }
 
-static void test_hashmap_walk(int task, void *data)
+static int helper_fill_hashmap(int max_entries)
 {
-	int fd, i, max_entries = 1000;
-	long long key, value, next_key;
-	bool next_key_valid = true;
+	int i, fd, ret;
+	long long key, value;
 
 	fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
 			    max_entries, map_flags);
-	if (fd < 0) {
-		printf("Failed to create hashmap '%s'!\n", strerror(errno));
-		exit(1);
-	}
+	CHECK(fd < 0,
+	      "failed to create hashmap",
+	      "err: %s, flags: 0x%x\n", strerror(errno), map_flags);
 
 	for (i = 0; i < max_entries; i++) {
 		key = i; value = key;
-		assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == 0);
+		ret = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
+		CHECK(ret != 0,
+		      "can't update hashmap",
+		      "err: %s\n", strerror(ret));
 	}
 
+	return fd;
+}
+
+static void test_hashmap_walk(int task, void *data)
+{
+	int fd, i, max_entries = 1000;
+	long long key, value, next_key;
+	bool next_key_valid = true;
+
+	fd = helper_fill_hashmap(max_entries);
+
 	for (i = 0; bpf_map_get_next_key(fd, !i ? NULL : &key,
 					 &next_key) == 0; i++) {
 		key = next_key;
@@ -306,6 +318,39 @@ static void test_hashmap_walk(int task, void *data)
 	close(fd);
 }
 
+static void test_hashmap_zero_seed(void)
+{
+	int i, first, second, old_flags;
+	long long key, next_first, next_second;
+
+	old_flags = map_flags;
+	map_flags |= BPF_F_ZERO_SEED;
+
+	first = helper_fill_hashmap(3);
+	second = helper_fill_hashmap(3);
+
+	for (i = 0; ; i++) {
+		void *key_ptr = !i ? NULL : &key;
+
+		if (bpf_map_get_next_key(first, key_ptr, &next_first) != 0)
+			break;
+
+		CHECK(bpf_map_get_next_key(second, key_ptr, &next_second) != 0,
+		      "next_key for second map must succeed",
+		      "key_ptr: %p", key_ptr);
+		CHECK(next_first != next_second,
+		      "keys must match",
+		      "i: %d first: %lld second: %lld\n", i,
+		      next_first, next_second);
+
+		key = next_first;
+	}
+
+	map_flags = old_flags;
+	close(first);
+	close(second);
+}
+
 static void test_arraymap(int task, void *data)
 {
 	int key, next_key, fd;
@@ -1534,6 +1579,7 @@ static void run_all_tests(void)
 	test_hashmap(0, NULL);
 	test_hashmap_percpu(0, NULL);
 	test_hashmap_walk(0, NULL);
+	test_hashmap_zero_seed();
 
 	test_arraymap(0, NULL);
 	test_arraymap_percpu(0, NULL);

From b47a0bd23e34022aa0d4b812fcebe85cb0c54d49 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Mon, 19 Nov 2018 15:29:06 -0800
Subject: [PATCH 42/71] bpf: btf: Break up btf_type_is_void()

This patch breaks up btf_type_is_void() into
btf_type_is_void() and btf_type_is_fwd().

It also adds btf_type_nosize() to better describe it is
testing a type has nosize info.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/btf.c | 37 ++++++++++++++++++++++---------------
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index ee4c82667d659..2a50d87de485b 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -306,15 +306,22 @@ static bool btf_type_is_modifier(const struct btf_type *t)
 
 static bool btf_type_is_void(const struct btf_type *t)
 {
-	/* void => no type and size info.
-	 * Hence, FWD is also treated as void.
-	 */
-	return t == &btf_void || BTF_INFO_KIND(t->info) == BTF_KIND_FWD;
+	return t == &btf_void;
+}
+
+static bool btf_type_is_fwd(const struct btf_type *t)
+{
+	return BTF_INFO_KIND(t->info) == BTF_KIND_FWD;
+}
+
+static bool btf_type_nosize(const struct btf_type *t)
+{
+	return btf_type_is_void(t) || btf_type_is_fwd(t);
 }
 
-static bool btf_type_is_void_or_null(const struct btf_type *t)
+static bool btf_type_nosize_or_null(const struct btf_type *t)
 {
-	return !t || btf_type_is_void(t);
+	return !t || btf_type_nosize(t);
 }
 
 /* union is only a special case of struct:
@@ -826,7 +833,7 @@ const struct btf_type *btf_type_id_size(const struct btf *btf,
 	u32 size = 0;
 
 	size_type = btf_type_by_id(btf, size_type_id);
-	if (btf_type_is_void_or_null(size_type))
+	if (btf_type_nosize_or_null(size_type))
 		return NULL;
 
 	if (btf_type_has_size(size_type)) {
@@ -842,7 +849,7 @@ const struct btf_type *btf_type_id_size(const struct btf *btf,
 		size = btf->resolved_sizes[size_type_id];
 		size_type_id = btf->resolved_ids[size_type_id];
 		size_type = btf_type_by_id(btf, size_type_id);
-		if (btf_type_is_void(size_type))
+		if (btf_type_nosize_or_null(size_type))
 			return NULL;
 	}
 
@@ -1164,7 +1171,7 @@ static int btf_modifier_resolve(struct btf_verifier_env *env,
 	}
 
 	/* "typedef void new_void", "const void"...etc */
-	if (btf_type_is_void(next_type))
+	if (btf_type_is_void(next_type) || btf_type_is_fwd(next_type))
 		goto resolved;
 
 	if (!env_type_is_resolve_sink(env, next_type) &&
@@ -1178,7 +1185,7 @@ static int btf_modifier_resolve(struct btf_verifier_env *env,
 	 * pretty print).
 	 */
 	if (!btf_type_id_size(btf, &next_type_id, &next_type_size) &&
-	    !btf_type_is_void(btf_type_id_resolve(btf, &next_type_id))) {
+	    !btf_type_nosize(btf_type_id_resolve(btf, &next_type_id))) {
 		btf_verifier_log_type(env, v->t, "Invalid type_id");
 		return -EINVAL;
 	}
@@ -1205,7 +1212,7 @@ static int btf_ptr_resolve(struct btf_verifier_env *env,
 	}
 
 	/* "void *" */
-	if (btf_type_is_void(next_type))
+	if (btf_type_is_void(next_type) || btf_type_is_fwd(next_type))
 		goto resolved;
 
 	if (!env_type_is_resolve_sink(env, next_type) &&
@@ -1235,7 +1242,7 @@ static int btf_ptr_resolve(struct btf_verifier_env *env,
 	}
 
 	if (!btf_type_id_size(btf, &next_type_id, &next_type_size) &&
-	    !btf_type_is_void(btf_type_id_resolve(btf, &next_type_id))) {
+	    !btf_type_nosize(btf_type_id_resolve(btf, &next_type_id))) {
 		btf_verifier_log_type(env, v->t, "Invalid type_id");
 		return -EINVAL;
 	}
@@ -1396,7 +1403,7 @@ static int btf_array_resolve(struct btf_verifier_env *env,
 	/* Check array->index_type */
 	index_type_id = array->index_type;
 	index_type = btf_type_by_id(btf, index_type_id);
-	if (btf_type_is_void_or_null(index_type)) {
+	if (btf_type_nosize_or_null(index_type)) {
 		btf_verifier_log_type(env, v->t, "Invalid index");
 		return -EINVAL;
 	}
@@ -1415,7 +1422,7 @@ static int btf_array_resolve(struct btf_verifier_env *env,
 	/* Check array->type */
 	elem_type_id = array->type;
 	elem_type = btf_type_by_id(btf, elem_type_id);
-	if (btf_type_is_void_or_null(elem_type)) {
+	if (btf_type_nosize_or_null(elem_type)) {
 		btf_verifier_log_type(env, v->t,
 				      "Invalid elem");
 		return -EINVAL;
@@ -1615,7 +1622,7 @@ static int btf_struct_resolve(struct btf_verifier_env *env,
 		const struct btf_type *member_type = btf_type_by_id(env->btf,
 								member_type_id);
 
-		if (btf_type_is_void_or_null(member_type)) {
+		if (btf_type_nosize_or_null(member_type)) {
 			btf_verifier_log_member(env, v->t, member,
 						"Invalid member");
 			return -EINVAL;

From 2667a2626f4da370409c2830552f6e8c8b8c41e2 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Mon, 19 Nov 2018 15:29:08 -0800
Subject: [PATCH 43/71] bpf: btf: Add BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO

This patch adds BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO
to support the function debug info.

BTF_KIND_FUNC_PROTO must not have a name (i.e. !t->name_off)
and it is followed by >= 0 'struct bpf_param' objects to
describe the function arguments.

The BTF_KIND_FUNC must have a valid name and it must
refer back to a BTF_KIND_FUNC_PROTO.

The above is the conclusion after the discussion between
Edward Cree, Alexei, Daniel, Yonghong and Martin.

By combining BTF_KIND_FUNC and BTF_LIND_FUNC_PROTO,
a complete function signature can be obtained.  It will be
used in the later patches to learn the function signature of
a running bpf program.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/btf.h |  18 +-
 kernel/bpf/btf.c         | 389 ++++++++++++++++++++++++++++++++++-----
 2 files changed, 354 insertions(+), 53 deletions(-)

diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h
index 972265f328717..14f66948fc95f 100644
--- a/include/uapi/linux/btf.h
+++ b/include/uapi/linux/btf.h
@@ -40,7 +40,8 @@ struct btf_type {
 	/* "size" is used by INT, ENUM, STRUCT and UNION.
 	 * "size" tells the size of the type it is describing.
 	 *
-	 * "type" is used by PTR, TYPEDEF, VOLATILE, CONST and RESTRICT.
+	 * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
+	 * FUNC and FUNC_PROTO.
 	 * "type" is a type_id referring to another type.
 	 */
 	union {
@@ -64,8 +65,10 @@ struct btf_type {
 #define BTF_KIND_VOLATILE	9	/* Volatile	*/
 #define BTF_KIND_CONST		10	/* Const	*/
 #define BTF_KIND_RESTRICT	11	/* Restrict	*/
-#define BTF_KIND_MAX		11
-#define NR_BTF_KINDS		12
+#define BTF_KIND_FUNC		12	/* Function	*/
+#define BTF_KIND_FUNC_PROTO	13	/* Function Proto	*/
+#define BTF_KIND_MAX		13
+#define NR_BTF_KINDS		14
 
 /* For some specific BTF_KIND, "struct btf_type" is immediately
  * followed by extra data.
@@ -110,4 +113,13 @@ struct btf_member {
 	__u32	offset;	/* offset in bits */
 };
 
+/* BTF_KIND_FUNC_PROTO is followed by multiple "struct btf_param".
+ * The exact number of btf_param is stored in the vlen (of the
+ * info in "struct btf_type").
+ */
+struct btf_param {
+	__u32	name_off;
+	__u32	type;
+};
+
 #endif /* _UAPI__LINUX_BTF_H__ */
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 2a50d87de485b..6a2be79b73fc7 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -5,6 +5,7 @@
 #include <uapi/linux/types.h>
 #include <linux/seq_file.h>
 #include <linux/compiler.h>
+#include <linux/ctype.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
 #include <linux/anon_inodes.h>
@@ -259,6 +260,8 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
 	[BTF_KIND_VOLATILE]	= "VOLATILE",
 	[BTF_KIND_CONST]	= "CONST",
 	[BTF_KIND_RESTRICT]	= "RESTRICT",
+	[BTF_KIND_FUNC]		= "FUNC",
+	[BTF_KIND_FUNC_PROTO]	= "FUNC_PROTO",
 };
 
 struct btf_kind_operations {
@@ -281,6 +284,9 @@ struct btf_kind_operations {
 static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS];
 static struct btf_type btf_void;
 
+static int btf_resolve(struct btf_verifier_env *env,
+		       const struct btf_type *t, u32 type_id);
+
 static bool btf_type_is_modifier(const struct btf_type *t)
 {
 	/* Some of them is not strictly a C modifier
@@ -314,9 +320,20 @@ static bool btf_type_is_fwd(const struct btf_type *t)
 	return BTF_INFO_KIND(t->info) == BTF_KIND_FWD;
 }
 
+static bool btf_type_is_func(const struct btf_type *t)
+{
+	return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC;
+}
+
+static bool btf_type_is_func_proto(const struct btf_type *t)
+{
+	return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC_PROTO;
+}
+
 static bool btf_type_nosize(const struct btf_type *t)
 {
-	return btf_type_is_void(t) || btf_type_is_fwd(t);
+	return btf_type_is_void(t) || btf_type_is_fwd(t) ||
+	       btf_type_is_func(t) || btf_type_is_func_proto(t);
 }
 
 static bool btf_type_nosize_or_null(const struct btf_type *t)
@@ -433,6 +450,30 @@ static bool btf_name_offset_valid(const struct btf *btf, u32 offset)
 		offset < btf->hdr.str_len;
 }
 
+/* Only C-style identifier is permitted. This can be relaxed if
+ * necessary.
+ */
+static bool btf_name_valid_identifier(const struct btf *btf, u32 offset)
+{
+	/* offset must be valid */
+	const char *src = &btf->strings[offset];
+	const char *src_limit;
+
+	if (!isalpha(*src) && *src != '_')
+		return false;
+
+	/* set a limit on identifier length */
+	src_limit = src + KSYM_NAME_LEN;
+	src++;
+	while (*src && src < src_limit) {
+		if (!isalnum(*src) && *src != '_')
+			return false;
+		src++;
+	}
+
+	return !*src;
+}
+
 static const char *btf_name_by_offset(const struct btf *btf, u32 offset)
 {
 	if (!offset)
@@ -747,11 +788,15 @@ static bool env_type_is_resolve_sink(const struct btf_verifier_env *env,
 		/* int, enum or void is a sink */
 		return !btf_type_needs_resolve(next_type);
 	case RESOLVE_PTR:
-		/* int, enum, void, struct or array is a sink for ptr */
+		/* int, enum, void, struct, array, func or func_proto is a sink
+		 * for ptr
+		 */
 		return !btf_type_is_modifier(next_type) &&
 			!btf_type_is_ptr(next_type);
 	case RESOLVE_STRUCT_OR_ARRAY:
-		/* int, enum, void or ptr is a sink for struct and array */
+		/* int, enum, void, ptr, func or func_proto is a sink
+		 * for struct and array
+		 */
 		return !btf_type_is_modifier(next_type) &&
 			!btf_type_is_array(next_type) &&
 			!btf_type_is_struct(next_type);
@@ -1170,10 +1215,6 @@ static int btf_modifier_resolve(struct btf_verifier_env *env,
 		return -EINVAL;
 	}
 
-	/* "typedef void new_void", "const void"...etc */
-	if (btf_type_is_void(next_type) || btf_type_is_fwd(next_type))
-		goto resolved;
-
 	if (!env_type_is_resolve_sink(env, next_type) &&
 	    !env_type_is_resolved(env, next_type_id))
 		return env_stack_push(env, next_type, next_type_id);
@@ -1184,13 +1225,18 @@ static int btf_modifier_resolve(struct btf_verifier_env *env,
 	 * save us a few type-following when we use it later (e.g. in
 	 * pretty print).
 	 */
-	if (!btf_type_id_size(btf, &next_type_id, &next_type_size) &&
-	    !btf_type_nosize(btf_type_id_resolve(btf, &next_type_id))) {
-		btf_verifier_log_type(env, v->t, "Invalid type_id");
-		return -EINVAL;
+	if (!btf_type_id_size(btf, &next_type_id, &next_type_size)) {
+		if (env_type_is_resolved(env, next_type_id))
+			next_type = btf_type_id_resolve(btf, &next_type_id);
+
+		/* "typedef void new_void", "const void"...etc */
+		if (!btf_type_is_void(next_type) &&
+		    !btf_type_is_fwd(next_type)) {
+			btf_verifier_log_type(env, v->t, "Invalid type_id");
+			return -EINVAL;
+		}
 	}
 
-resolved:
 	env_stack_pop_resolved(env, next_type_id, next_type_size);
 
 	return 0;
@@ -1203,7 +1249,6 @@ static int btf_ptr_resolve(struct btf_verifier_env *env,
 	const struct btf_type *t = v->t;
 	u32 next_type_id = t->type;
 	struct btf *btf = env->btf;
-	u32 next_type_size = 0;
 
 	next_type = btf_type_by_id(btf, next_type_id);
 	if (!next_type) {
@@ -1211,10 +1256,6 @@ static int btf_ptr_resolve(struct btf_verifier_env *env,
 		return -EINVAL;
 	}
 
-	/* "void *" */
-	if (btf_type_is_void(next_type) || btf_type_is_fwd(next_type))
-		goto resolved;
-
 	if (!env_type_is_resolve_sink(env, next_type) &&
 	    !env_type_is_resolved(env, next_type_id))
 		return env_stack_push(env, next_type, next_type_id);
@@ -1241,13 +1282,18 @@ static int btf_ptr_resolve(struct btf_verifier_env *env,
 					      resolved_type_id);
 	}
 
-	if (!btf_type_id_size(btf, &next_type_id, &next_type_size) &&
-	    !btf_type_nosize(btf_type_id_resolve(btf, &next_type_id))) {
-		btf_verifier_log_type(env, v->t, "Invalid type_id");
-		return -EINVAL;
+	if (!btf_type_id_size(btf, &next_type_id, NULL)) {
+		if (env_type_is_resolved(env, next_type_id))
+			next_type = btf_type_id_resolve(btf, &next_type_id);
+
+		if (!btf_type_is_void(next_type) &&
+		    !btf_type_is_fwd(next_type) &&
+		    !btf_type_is_func_proto(next_type)) {
+			btf_verifier_log_type(env, v->t, "Invalid type_id");
+			return -EINVAL;
+		}
 	}
 
-resolved:
 	env_stack_pop_resolved(env, next_type_id, 0);
 
 	return 0;
@@ -1787,6 +1833,232 @@ static struct btf_kind_operations enum_ops = {
 	.seq_show = btf_enum_seq_show,
 };
 
+static s32 btf_func_proto_check_meta(struct btf_verifier_env *env,
+				     const struct btf_type *t,
+				     u32 meta_left)
+{
+	u32 meta_needed = btf_type_vlen(t) * sizeof(struct btf_param);
+
+	if (meta_left < meta_needed) {
+		btf_verifier_log_basic(env, t,
+				       "meta_left:%u meta_needed:%u",
+				       meta_left, meta_needed);
+		return -EINVAL;
+	}
+
+	if (t->name_off) {
+		btf_verifier_log_type(env, t, "Invalid name");
+		return -EINVAL;
+	}
+
+	btf_verifier_log_type(env, t, NULL);
+
+	return meta_needed;
+}
+
+static void btf_func_proto_log(struct btf_verifier_env *env,
+			       const struct btf_type *t)
+{
+	const struct btf_param *args = (const struct btf_param *)(t + 1);
+	u16 nr_args = btf_type_vlen(t), i;
+
+	btf_verifier_log(env, "return=%u args=(", t->type);
+	if (!nr_args) {
+		btf_verifier_log(env, "void");
+		goto done;
+	}
+
+	if (nr_args == 1 && !args[0].type) {
+		/* Only one vararg */
+		btf_verifier_log(env, "vararg");
+		goto done;
+	}
+
+	btf_verifier_log(env, "%u %s", args[0].type,
+			 btf_name_by_offset(env->btf,
+					    args[0].name_off));
+	for (i = 1; i < nr_args - 1; i++)
+		btf_verifier_log(env, ", %u %s", args[i].type,
+				 btf_name_by_offset(env->btf,
+						    args[i].name_off));
+
+	if (nr_args > 1) {
+		const struct btf_param *last_arg = &args[nr_args - 1];
+
+		if (last_arg->type)
+			btf_verifier_log(env, ", %u %s", last_arg->type,
+					 btf_name_by_offset(env->btf,
+							    last_arg->name_off));
+		else
+			btf_verifier_log(env, ", vararg");
+	}
+
+done:
+	btf_verifier_log(env, ")");
+}
+
+static struct btf_kind_operations func_proto_ops = {
+	.check_meta = btf_func_proto_check_meta,
+	.resolve = btf_df_resolve,
+	/*
+	 * BTF_KIND_FUNC_PROTO cannot be directly referred by
+	 * a struct's member.
+	 *
+	 * It should be a funciton pointer instead.
+	 * (i.e. struct's member -> BTF_KIND_PTR -> BTF_KIND_FUNC_PROTO)
+	 *
+	 * Hence, there is no btf_func_check_member().
+	 */
+	.check_member = btf_df_check_member,
+	.log_details = btf_func_proto_log,
+	.seq_show = btf_df_seq_show,
+};
+
+static s32 btf_func_check_meta(struct btf_verifier_env *env,
+			       const struct btf_type *t,
+			       u32 meta_left)
+{
+	if (!t->name_off ||
+	    !btf_name_valid_identifier(env->btf, t->name_off)) {
+		btf_verifier_log_type(env, t, "Invalid name");
+		return -EINVAL;
+	}
+
+	if (btf_type_vlen(t)) {
+		btf_verifier_log_type(env, t, "vlen != 0");
+		return -EINVAL;
+	}
+
+	btf_verifier_log_type(env, t, NULL);
+
+	return 0;
+}
+
+static struct btf_kind_operations func_ops = {
+	.check_meta = btf_func_check_meta,
+	.resolve = btf_df_resolve,
+	.check_member = btf_df_check_member,
+	.log_details = btf_ref_type_log,
+	.seq_show = btf_df_seq_show,
+};
+
+static int btf_func_proto_check(struct btf_verifier_env *env,
+				const struct btf_type *t)
+{
+	const struct btf_type *ret_type;
+	const struct btf_param *args;
+	const struct btf *btf;
+	u16 nr_args, i;
+	int err;
+
+	btf = env->btf;
+	args = (const struct btf_param *)(t + 1);
+	nr_args = btf_type_vlen(t);
+
+	/* Check func return type which could be "void" (t->type == 0) */
+	if (t->type) {
+		u32 ret_type_id = t->type;
+
+		ret_type = btf_type_by_id(btf, ret_type_id);
+		if (!ret_type) {
+			btf_verifier_log_type(env, t, "Invalid return type");
+			return -EINVAL;
+		}
+
+		if (btf_type_needs_resolve(ret_type) &&
+		    !env_type_is_resolved(env, ret_type_id)) {
+			err = btf_resolve(env, ret_type, ret_type_id);
+			if (err)
+				return err;
+		}
+
+		/* Ensure the return type is a type that has a size */
+		if (!btf_type_id_size(btf, &ret_type_id, NULL)) {
+			btf_verifier_log_type(env, t, "Invalid return type");
+			return -EINVAL;
+		}
+	}
+
+	if (!nr_args)
+		return 0;
+
+	/* Last func arg type_id could be 0 if it is a vararg */
+	if (!args[nr_args - 1].type) {
+		if (args[nr_args - 1].name_off) {
+			btf_verifier_log_type(env, t, "Invalid arg#%u",
+					      nr_args);
+			return -EINVAL;
+		}
+		nr_args--;
+	}
+
+	err = 0;
+	for (i = 0; i < nr_args; i++) {
+		const struct btf_type *arg_type;
+		u32 arg_type_id;
+
+		arg_type_id = args[i].type;
+		arg_type = btf_type_by_id(btf, arg_type_id);
+		if (!arg_type) {
+			btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1);
+			err = -EINVAL;
+			break;
+		}
+
+		if (args[i].name_off &&
+		    (!btf_name_offset_valid(btf, args[i].name_off) ||
+		     !btf_name_valid_identifier(btf, args[i].name_off))) {
+			btf_verifier_log_type(env, t,
+					      "Invalid arg#%u", i + 1);
+			err = -EINVAL;
+			break;
+		}
+
+		if (btf_type_needs_resolve(arg_type) &&
+		    !env_type_is_resolved(env, arg_type_id)) {
+			err = btf_resolve(env, arg_type, arg_type_id);
+			if (err)
+				break;
+		}
+
+		if (!btf_type_id_size(btf, &arg_type_id, NULL)) {
+			btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1);
+			err = -EINVAL;
+			break;
+		}
+	}
+
+	return err;
+}
+
+static int btf_func_check(struct btf_verifier_env *env,
+			  const struct btf_type *t)
+{
+	const struct btf_type *proto_type;
+	const struct btf_param *args;
+	const struct btf *btf;
+	u16 nr_args, i;
+
+	btf = env->btf;
+	proto_type = btf_type_by_id(btf, t->type);
+
+	if (!proto_type || !btf_type_is_func_proto(proto_type)) {
+		btf_verifier_log_type(env, t, "Invalid type_id");
+		return -EINVAL;
+	}
+
+	args = (const struct btf_param *)(proto_type + 1);
+	nr_args = btf_type_vlen(proto_type);
+	for (i = 0; i < nr_args; i++) {
+		if (!args[i].name_off && args[i].type) {
+			btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
 static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS] = {
 	[BTF_KIND_INT] = &int_ops,
 	[BTF_KIND_PTR] = &ptr_ops,
@@ -1799,6 +2071,8 @@ static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS] = {
 	[BTF_KIND_VOLATILE] = &modifier_ops,
 	[BTF_KIND_CONST] = &modifier_ops,
 	[BTF_KIND_RESTRICT] = &modifier_ops,
+	[BTF_KIND_FUNC] = &func_ops,
+	[BTF_KIND_FUNC_PROTO] = &func_proto_ops,
 };
 
 static s32 btf_check_meta(struct btf_verifier_env *env,
@@ -1870,30 +2144,6 @@ static int btf_check_all_metas(struct btf_verifier_env *env)
 	return 0;
 }
 
-static int btf_resolve(struct btf_verifier_env *env,
-		       const struct btf_type *t, u32 type_id)
-{
-	const struct resolve_vertex *v;
-	int err = 0;
-
-	env->resolve_mode = RESOLVE_TBD;
-	env_stack_push(env, t, type_id);
-	while (!err && (v = env_stack_peak(env))) {
-		env->log_type_id = v->type_id;
-		err = btf_type_ops(v->t)->resolve(env, v);
-	}
-
-	env->log_type_id = type_id;
-	if (err == -E2BIG)
-		btf_verifier_log_type(env, t,
-				      "Exceeded max resolving depth:%u",
-				      MAX_RESOLVE_DEPTH);
-	else if (err == -EEXIST)
-		btf_verifier_log_type(env, t, "Loop detected");
-
-	return err;
-}
-
 static bool btf_resolve_valid(struct btf_verifier_env *env,
 			      const struct btf_type *t,
 			      u32 type_id)
@@ -1927,6 +2177,39 @@ static bool btf_resolve_valid(struct btf_verifier_env *env,
 	return false;
 }
 
+static int btf_resolve(struct btf_verifier_env *env,
+		       const struct btf_type *t, u32 type_id)
+{
+	u32 save_log_type_id = env->log_type_id;
+	const struct resolve_vertex *v;
+	int err = 0;
+
+	env->resolve_mode = RESOLVE_TBD;
+	env_stack_push(env, t, type_id);
+	while (!err && (v = env_stack_peak(env))) {
+		env->log_type_id = v->type_id;
+		err = btf_type_ops(v->t)->resolve(env, v);
+	}
+
+	env->log_type_id = type_id;
+	if (err == -E2BIG) {
+		btf_verifier_log_type(env, t,
+				      "Exceeded max resolving depth:%u",
+				      MAX_RESOLVE_DEPTH);
+	} else if (err == -EEXIST) {
+		btf_verifier_log_type(env, t, "Loop detected");
+	}
+
+	/* Final sanity check */
+	if (!err && !btf_resolve_valid(env, t, type_id)) {
+		btf_verifier_log_type(env, t, "Invalid resolve state");
+		err = -EINVAL;
+	}
+
+	env->log_type_id = save_log_type_id;
+	return err;
+}
+
 static int btf_check_all_types(struct btf_verifier_env *env)
 {
 	struct btf *btf = env->btf;
@@ -1949,10 +2232,16 @@ static int btf_check_all_types(struct btf_verifier_env *env)
 				return err;
 		}
 
-		if (btf_type_needs_resolve(t) &&
-		    !btf_resolve_valid(env, t, type_id)) {
-			btf_verifier_log_type(env, t, "Invalid resolve state");
-			return -EINVAL;
+		if (btf_type_is_func_proto(t)) {
+			err = btf_func_proto_check(env, t);
+			if (err)
+				return err;
+		}
+
+		if (btf_type_is_func(t)) {
+			err = btf_func_check(env, t);
+			if (err)
+				return err;
 		}
 	}
 

From 781e775e296ce3aabe0a4a0f773dccda02267695 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Mon, 19 Nov 2018 15:29:09 -0800
Subject: [PATCH 44/71] tools/bpf: Sync kernel btf.h header

The kernel uapi btf.h is synced to the tools directory.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/include/uapi/linux/btf.h | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h
index 972265f328717..14f66948fc95f 100644
--- a/tools/include/uapi/linux/btf.h
+++ b/tools/include/uapi/linux/btf.h
@@ -40,7 +40,8 @@ struct btf_type {
 	/* "size" is used by INT, ENUM, STRUCT and UNION.
 	 * "size" tells the size of the type it is describing.
 	 *
-	 * "type" is used by PTR, TYPEDEF, VOLATILE, CONST and RESTRICT.
+	 * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
+	 * FUNC and FUNC_PROTO.
 	 * "type" is a type_id referring to another type.
 	 */
 	union {
@@ -64,8 +65,10 @@ struct btf_type {
 #define BTF_KIND_VOLATILE	9	/* Volatile	*/
 #define BTF_KIND_CONST		10	/* Const	*/
 #define BTF_KIND_RESTRICT	11	/* Restrict	*/
-#define BTF_KIND_MAX		11
-#define NR_BTF_KINDS		12
+#define BTF_KIND_FUNC		12	/* Function	*/
+#define BTF_KIND_FUNC_PROTO	13	/* Function Proto	*/
+#define BTF_KIND_MAX		13
+#define NR_BTF_KINDS		14
 
 /* For some specific BTF_KIND, "struct btf_type" is immediately
  * followed by extra data.
@@ -110,4 +113,13 @@ struct btf_member {
 	__u32	offset;	/* offset in bits */
 };
 
+/* BTF_KIND_FUNC_PROTO is followed by multiple "struct btf_param".
+ * The exact number of btf_param is stored in the vlen (of the
+ * info in "struct btf_type").
+ */
+struct btf_param {
+	__u32	name_off;
+	__u32	type;
+};
+
 #endif /* _UAPI__LINUX_BTF_H__ */

From 78a2540e8945678b390a5f41eb82459bc6f0f36c Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Mon, 19 Nov 2018 15:29:10 -0800
Subject: [PATCH 45/71] tools/bpf: Add tests for BTF_KIND_FUNC_PROTO and
 BTF_KIND_FUNC

This patch adds unit tests for BTF_KIND_FUNC_PROTO and
BTF_KIND_FUNC to test_btf.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/btf.c                    |   4 +
 tools/testing/selftests/bpf/test_btf.c | 474 ++++++++++++++++++++++++-
 2 files changed, 476 insertions(+), 2 deletions(-)

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 449591aa99000..31225e64766fd 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -165,6 +165,10 @@ static int btf_parse_type_sec(struct btf *btf, btf_print_fn_t err_log)
 		case BTF_KIND_ENUM:
 			next_type += vlen * sizeof(struct btf_enum);
 			break;
+		case BTF_KIND_FUNC_PROTO:
+			next_type += vlen * sizeof(struct btf_param);
+			break;
+		case BTF_KIND_FUNC:
 		case BTF_KIND_TYPEDEF:
 		case BTF_KIND_PTR:
 		case BTF_KIND_FWD:
diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index f42b3396d6226..e0eeee5c8c049 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -85,8 +85,20 @@ static int __base_pr(const char *format, ...)
 #define BTF_TYPEDEF_ENC(name, type) \
 	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), type)
 
-#define BTF_PTR_ENC(name, type) \
-	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), type)
+#define BTF_PTR_ENC(type) \
+	BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), type)
+
+#define BTF_CONST_ENC(type) \
+	BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), type)
+
+#define BTF_FUNC_PROTO_ENC(ret_type, nargs) \
+	BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, nargs), ret_type)
+
+#define BTF_FUNC_PROTO_ARG_ENC(name, type) \
+	(name), (type)
+
+#define BTF_FUNC_ENC(name, func_proto) \
+	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), func_proto)
 
 #define BTF_END_RAW 0xdeadbeef
 #define NAME_TBD 0xdeadb33f
@@ -1374,6 +1386,464 @@ static struct btf_raw_test raw_tests[] = {
 	.map_create_err = true,
 },
 
+{
+	.descr = "func proto (int (*)(int, unsigned int))",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+		BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),		/* [2] */
+		/* int (*)(int, unsigned int) */
+		BTF_FUNC_PROTO_ENC(1, 2),			/* [3] */
+			BTF_FUNC_PROTO_ARG_ENC(0, 1),
+			BTF_FUNC_PROTO_ARG_ENC(0, 2),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "func_proto_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+},
+
+{
+	.descr = "func proto (vararg)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),		/* [2] */
+		/* void (*)(int, unsigned int, ...) */
+		BTF_FUNC_PROTO_ENC(0, 3),			/* [3] */
+			BTF_FUNC_PROTO_ARG_ENC(0, 1),
+			BTF_FUNC_PROTO_ARG_ENC(0, 2),
+			BTF_FUNC_PROTO_ARG_ENC(0, 0),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "func_proto_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+},
+
+{
+	.descr = "func proto (vararg with name)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),		/* [2] */
+		/* void (*)(int a, unsigned int b, ... c) */
+		BTF_FUNC_PROTO_ENC(0, 3),			/* [3] */
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2),
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 0),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0a\0b\0c",
+	.str_sec_size = sizeof("\0a\0b\0c"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "func_proto_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid arg#3",
+},
+
+{
+	.descr = "func proto (arg after vararg)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),		/* [2] */
+		/* void (*)(int a, ..., unsigned int b) */
+		BTF_FUNC_PROTO_ENC(0, 3),			/* [3] */
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+			BTF_FUNC_PROTO_ARG_ENC(0, 0),
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0a\0b",
+	.str_sec_size = sizeof("\0a\0b"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "func_proto_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid arg#2",
+},
+
+{
+	.descr = "func proto (CONST=>TYPEDEF=>PTR=>FUNC_PROTO)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),		/* [2] */
+		/* typedef void (*func_ptr)(int, unsigned int) */
+		BTF_TYPEDEF_ENC(NAME_TBD, 5),			/* [3] */
+		/* const func_ptr */
+		BTF_CONST_ENC(3),				/* [4] */
+		BTF_PTR_ENC(6),					/* [5] */
+		BTF_FUNC_PROTO_ENC(0, 2),			/* [6] */
+			BTF_FUNC_PROTO_ARG_ENC(0, 1),
+			BTF_FUNC_PROTO_ARG_ENC(0, 2),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0func_ptr",
+	.str_sec_size = sizeof("\0func_ptr"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "func_proto_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+},
+
+{
+	.descr = "func proto (CONST=>TYPEDEF=>FUNC_PROTO)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),		/* [2] */
+		BTF_CONST_ENC(4),				/* [3] */
+		BTF_TYPEDEF_ENC(NAME_TBD, 5),			/* [4] */
+		BTF_FUNC_PROTO_ENC(0, 2),			/* [5] */
+			BTF_FUNC_PROTO_ARG_ENC(0, 1),
+			BTF_FUNC_PROTO_ARG_ENC(0, 2),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0func_typedef",
+	.str_sec_size = sizeof("\0func_typedef"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "func_proto_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid type_id",
+},
+
+{
+	.descr = "func proto (btf_resolve(arg))",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		/* void (*)(const void *) */
+		BTF_FUNC_PROTO_ENC(0, 1),			/* [2] */
+			BTF_FUNC_PROTO_ARG_ENC(0, 3),
+		BTF_CONST_ENC(4),				/* [3] */
+		BTF_PTR_ENC(0),					/* [4] */
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "func_proto_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+},
+
+{
+	.descr = "func proto (Not all arg has name)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),		/* [2] */
+		/* void (*)(int, unsigned int b) */
+		BTF_FUNC_PROTO_ENC(0, 2),			/* [3] */
+			BTF_FUNC_PROTO_ARG_ENC(0, 1),
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0b",
+	.str_sec_size = sizeof("\0b"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "func_proto_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+},
+
+{
+	.descr = "func proto (Bad arg name_off)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),		/* [2] */
+		/* void (*)(int a, unsigned int <bad_name_off>) */
+		BTF_FUNC_PROTO_ENC(0, 2),			/* [3] */
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+			BTF_FUNC_PROTO_ARG_ENC(0xffffffff, 2),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0a",
+	.str_sec_size = sizeof("\0a"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "func_proto_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid arg#2",
+},
+
+{
+	.descr = "func proto (Bad arg name)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),		/* [2] */
+		/* void (*)(int a, unsigned int !!!) */
+		BTF_FUNC_PROTO_ENC(0, 2),			/* [3] */
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0a\0!!!",
+	.str_sec_size = sizeof("\0a\0!!!"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "func_proto_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid arg#2",
+},
+
+{
+	.descr = "func proto (Invalid return type)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),		/* [2] */
+		/* <bad_ret_type> (*)(int, unsigned int) */
+		BTF_FUNC_PROTO_ENC(100, 2),			/* [3] */
+			BTF_FUNC_PROTO_ARG_ENC(0, 1),
+			BTF_FUNC_PROTO_ARG_ENC(0, 2),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "func_proto_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid return type",
+},
+
+{
+	.descr = "func proto (with func name)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),		/* [2] */
+		/* void func_proto(int, unsigned int) */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 2), 0),	/* [3] */
+			BTF_FUNC_PROTO_ARG_ENC(0, 1),
+			BTF_FUNC_PROTO_ARG_ENC(0, 2),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0func_proto",
+	.str_sec_size = sizeof("\0func_proto"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "func_proto_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "func proto (const void arg)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),		/* [2] */
+		/* void (*)(const void) */
+		BTF_FUNC_PROTO_ENC(0, 1),			/* [3] */
+			BTF_FUNC_PROTO_ARG_ENC(0, 4),
+		BTF_CONST_ENC(0),				/* [4] */
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "func_proto_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid arg#1",
+},
+
+{
+	.descr = "func (void func(int a, unsigned int b))",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),		/* [2] */
+		/* void (*)(int a, unsigned int b) */
+		BTF_FUNC_PROTO_ENC(0, 2),			/* [3] */
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2),
+		/* void func(int a, unsigned int b) */
+		BTF_FUNC_ENC(NAME_TBD, 3),			/* [4] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0a\0b\0func",
+	.str_sec_size = sizeof("\0a\0b\0func"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "func_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+},
+
+{
+	.descr = "func (No func name)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),		/* [2] */
+		/* void (*)(int a, unsigned int b) */
+		BTF_FUNC_PROTO_ENC(0, 2),			/* [3] */
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2),
+		/* void <no_name>(int a, unsigned int b) */
+		BTF_FUNC_ENC(0, 3),				/* [4] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0a\0b",
+	.str_sec_size = sizeof("\0a\0b"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "func_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "func (Invalid func name)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),		/* [2] */
+		/* void (*)(int a, unsigned int b) */
+		BTF_FUNC_PROTO_ENC(0, 2),			/* [3] */
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2),
+		/* void !!!(int a, unsigned int b) */
+		BTF_FUNC_ENC(NAME_TBD, 3),			/* [4] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0a\0b\0!!!",
+	.str_sec_size = sizeof("\0a\0b\0!!!"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "func_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "func (Some arg has no name)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),		/* [2] */
+		/* void (*)(int a, unsigned int) */
+		BTF_FUNC_PROTO_ENC(0, 2),			/* [3] */
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+			BTF_FUNC_PROTO_ARG_ENC(0, 2),
+		/* void func(int a, unsigned int) */
+		BTF_FUNC_ENC(NAME_TBD, 3),			/* [4] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0a\0func",
+	.str_sec_size = sizeof("\0a\0func"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "func_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid arg#2",
+},
+
+{
+	.descr = "func (Non zero vlen)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),		/* [2] */
+		/* void (*)(int a, unsigned int b) */
+		BTF_FUNC_PROTO_ENC(0, 2),			/* [3] */
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2),
+		/* void func(int a, unsigned int b) */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 2), 3), 	/* [4] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0a\0b\0func",
+	.str_sec_size = sizeof("\0a\0b\0func"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "func_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "vlen != 0",
+},
+
+{
+	.descr = "func (Not referring to FUNC_PROTO)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_FUNC_ENC(NAME_TBD, 1),			/* [2] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0func",
+	.str_sec_size = sizeof("\0func"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "func_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid type_id",
+},
+
 }; /* struct btf_raw_test raw_tests[] */
 
 static const char *get_next_str(const char *start, const char *end)

From 838e96904ff3fc6c30e5ebbc611474669856e3c0 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Mon, 19 Nov 2018 15:29:11 -0800
Subject: [PATCH 46/71] bpf: Introduce bpf_func_info

This patch added interface to load a program with the following
additional information:
   . prog_btf_fd
   . func_info, func_info_rec_size and func_info_cnt
where func_info will provide function range and type_id
corresponding to each function.

The func_info_rec_size is introduced in the UAPI to specify
struct bpf_func_info size passed from user space. This
intends to make bpf_func_info structure growable in the future.
If the kernel gets a different bpf_func_info size from userspace,
it will try to handle user request with part of bpf_func_info
it can understand. In this patch, kernel can understand
  struct bpf_func_info {
       __u32   insn_offset;
       __u32   type_id;
  };
If user passed a bpf func_info record size of 16 bytes, the
kernel can still handle part of records with the above definition.

If verifier agrees with function range provided by the user,
the bpf_prog ksym for each function will use the func name
provided in the type_id, which is supposed to provide better
encoding as it is not limited by 16 bytes program name
limitation and this is better for bpf program which contains
multiple subprograms.

The bpf_prog_info interface is also extended to
return btf_id, func_info, func_info_rec_size and func_info_cnt
to userspace, so userspace can print out the function prototype
for each xlated function. The insn_offset in the returned
func_info corresponds to the insn offset for xlated functions.
With other jit related fields in bpf_prog_info, userspace can also
print out function prototypes for each jited function.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h          |   5 +-
 include/linux/bpf_verifier.h |   1 +
 include/linux/btf.h          |   2 +
 include/uapi/linux/bpf.h     |  13 ++++
 kernel/bpf/btf.c             |   4 +-
 kernel/bpf/core.c            |  13 ++++
 kernel/bpf/syscall.c         |  59 +++++++++++++++--
 kernel/bpf/verifier.c        | 120 ++++++++++++++++++++++++++++++++++-
 8 files changed, 209 insertions(+), 8 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 987815152629a..7f0e225bf630b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -316,6 +316,8 @@ struct bpf_prog_aux {
 	void *security;
 #endif
 	struct bpf_prog_offload *offload;
+	struct btf *btf;
+	u32 type_id; /* type id for this prog/func */
 	union {
 		struct work_struct work;
 		struct rcu_head	rcu;
@@ -527,7 +529,8 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size)
 }
 
 /* verify correctness of eBPF program */
-int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
+int bpf_check(struct bpf_prog **fp, union bpf_attr *attr,
+	      union bpf_attr __user *uattr);
 void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth);
 
 /* Map specifics */
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 11f5df1092d9b..204382f46fd88 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -204,6 +204,7 @@ static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
 struct bpf_subprog_info {
 	u32 start; /* insn idx of function entry point */
 	u16 stack_depth; /* max. stack depth used by this function */
+	u32 type_id; /* btf type_id for this subprog */
 };
 
 /* single container for all structs
diff --git a/include/linux/btf.h b/include/linux/btf.h
index e076c4697049d..7f2c0a4a45ea6 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -46,5 +46,7 @@ void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj,
 		       struct seq_file *m);
 int btf_get_fd_by_id(u32 id);
 u32 btf_id(const struct btf *btf);
+const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
+const char *btf_name_by_offset(const struct btf *btf, u32 offset);
 
 #endif
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 05d95290b8486..c1554aa074659 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -338,6 +338,10 @@ union bpf_attr {
 		 * (context accesses, allowed helpers, etc).
 		 */
 		__u32		expected_attach_type;
+		__u32		prog_btf_fd;	/* fd pointing to BTF type data */
+		__u32		func_info_rec_size;	/* userspace bpf_func_info size */
+		__aligned_u64	func_info;	/* func info */
+		__u32		func_info_cnt;	/* number of bpf_func_info records */
 	};
 
 	struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -2638,6 +2642,10 @@ struct bpf_prog_info {
 	__u32 nr_jited_func_lens;
 	__aligned_u64 jited_ksyms;
 	__aligned_u64 jited_func_lens;
+	__u32 btf_id;
+	__u32 func_info_rec_size;
+	__aligned_u64 func_info;
+	__u32 func_info_cnt;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {
@@ -2949,4 +2957,9 @@ struct bpf_flow_keys {
 	};
 };
 
+struct bpf_func_info {
+	__u32	insn_offset;
+	__u32	type_id;
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 6a2be79b73fc7..69da9169819ad 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -474,7 +474,7 @@ static bool btf_name_valid_identifier(const struct btf *btf, u32 offset)
 	return !*src;
 }
 
-static const char *btf_name_by_offset(const struct btf *btf, u32 offset)
+const char *btf_name_by_offset(const struct btf *btf, u32 offset)
 {
 	if (!offset)
 		return "(anon)";
@@ -484,7 +484,7 @@ static const char *btf_name_by_offset(const struct btf *btf, u32 offset)
 		return "(invalid-name-offset)";
 }
 
-static const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id)
+const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id)
 {
 	if (type_id > btf->nr_types)
 		return NULL;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 1a796e0799ec4..16d77012ad3ef 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -21,12 +21,14 @@
  * Kris Katterjohn - Added many additional checks in bpf_check_classic()
  */
 
+#include <uapi/linux/btf.h>
 #include <linux/filter.h>
 #include <linux/skbuff.h>
 #include <linux/vmalloc.h>
 #include <linux/random.h>
 #include <linux/moduleloader.h>
 #include <linux/bpf.h>
+#include <linux/btf.h>
 #include <linux/frame.h>
 #include <linux/rbtree_latch.h>
 #include <linux/kallsyms.h>
@@ -390,6 +392,8 @@ bpf_get_prog_addr_region(const struct bpf_prog *prog,
 static void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
 {
 	const char *end = sym + KSYM_NAME_LEN;
+	const struct btf_type *type;
+	const char *func_name;
 
 	BUILD_BUG_ON(sizeof("bpf_prog_") +
 		     sizeof(prog->tag) * 2 +
@@ -404,6 +408,15 @@ static void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
 
 	sym += snprintf(sym, KSYM_NAME_LEN, "bpf_prog_");
 	sym  = bin2hex(sym, prog->tag, sizeof(prog->tag));
+
+	/* prog->aux->name will be ignored if full btf name is available */
+	if (prog->aux->btf) {
+		type = btf_type_by_id(prog->aux->btf, prog->aux->type_id);
+		func_name = btf_name_by_offset(prog->aux->btf, type->name_off);
+		snprintf(sym, (size_t)(end - sym), "_%s", func_name);
+		return;
+	}
+
 	if (prog->aux->name[0])
 		snprintf(sym, (size_t)(end - sym), "_%s", prog->aux->name);
 	else
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index cf5040fd54344..9983778081021 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1213,6 +1213,7 @@ static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
 		/* bpf_prog_free_id() must be called first */
 		bpf_prog_free_id(prog, do_idr_lock);
 		bpf_prog_kallsyms_del_all(prog);
+		btf_put(prog->aux->btf);
 
 		call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
 	}
@@ -1437,9 +1438,9 @@ bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
 }
 
 /* last field in 'union bpf_attr' used by this command */
-#define	BPF_PROG_LOAD_LAST_FIELD expected_attach_type
+#define	BPF_PROG_LOAD_LAST_FIELD func_info_cnt
 
-static int bpf_prog_load(union bpf_attr *attr)
+static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
 {
 	enum bpf_prog_type type = attr->prog_type;
 	struct bpf_prog *prog;
@@ -1525,7 +1526,7 @@ static int bpf_prog_load(union bpf_attr *attr)
 		goto free_prog;
 
 	/* run eBPF verifier */
-	err = bpf_check(&prog, attr);
+	err = bpf_check(&prog, attr, uattr);
 	if (err < 0)
 		goto free_used_maps;
 
@@ -2079,6 +2080,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 		info.xlated_prog_len = 0;
 		info.nr_jited_ksyms = 0;
 		info.nr_jited_func_lens = 0;
+		info.func_info_cnt = 0;
 		goto done;
 	}
 
@@ -2216,6 +2218,55 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 		}
 	}
 
+	if (prog->aux->btf) {
+		u32 ucnt, urec_size;
+
+		info.btf_id = btf_id(prog->aux->btf);
+
+		ucnt = info.func_info_cnt;
+		info.func_info_cnt = prog->aux->func_cnt ? : 1;
+		urec_size = info.func_info_rec_size;
+		info.func_info_rec_size = sizeof(struct bpf_func_info);
+		if (ucnt) {
+			/* expect passed-in urec_size is what the kernel expects */
+			if (urec_size != info.func_info_rec_size)
+				return -EINVAL;
+
+			if (bpf_dump_raw_ok()) {
+				struct bpf_func_info kern_finfo;
+				char __user *user_finfo;
+				u32 i, insn_offset;
+
+				user_finfo = u64_to_user_ptr(info.func_info);
+				if (prog->aux->func_cnt) {
+					ucnt = min_t(u32, info.func_info_cnt, ucnt);
+					insn_offset = 0;
+					for (i = 0; i < ucnt; i++) {
+						kern_finfo.insn_offset = insn_offset;
+						kern_finfo.type_id = prog->aux->func[i]->aux->type_id;
+						if (copy_to_user(user_finfo, &kern_finfo,
+								 sizeof(kern_finfo)))
+							return -EFAULT;
+
+						/* func[i]->len holds the prog len */
+						insn_offset += prog->aux->func[i]->len;
+						user_finfo += urec_size;
+					}
+				} else {
+					kern_finfo.insn_offset = 0;
+					kern_finfo.type_id = prog->aux->type_id;
+					if (copy_to_user(user_finfo, &kern_finfo,
+							 sizeof(kern_finfo)))
+						return -EFAULT;
+				}
+			} else {
+				info.func_info_cnt = 0;
+			}
+		}
+	} else {
+		info.func_info_cnt = 0;
+	}
+
 done:
 	if (copy_to_user(uinfo, &info, info_len) ||
 	    put_user(info_len, &uattr->info.info_len))
@@ -2501,7 +2552,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 		err = map_get_next_key(&attr);
 		break;
 	case BPF_PROG_LOAD:
-		err = bpf_prog_load(&attr);
+		err = bpf_prog_load(&attr, uattr);
 		break;
 	case BPF_OBJ_PIN:
 		err = bpf_obj_pin(&attr);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index b5222aa61d543..f102c4fd0c5af 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -11,10 +11,12 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  * General Public License for more details.
  */
+#include <uapi/linux/btf.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/bpf.h>
+#include <linux/btf.h>
 #include <linux/bpf_verifier.h>
 #include <linux/filter.h>
 #include <net/netlink.h>
@@ -4639,6 +4641,114 @@ static int check_cfg(struct bpf_verifier_env *env)
 	return ret;
 }
 
+/* The minimum supported BTF func info size */
+#define MIN_BPF_FUNCINFO_SIZE	8
+#define MAX_FUNCINFO_REC_SIZE	252
+
+static int check_btf_func(struct bpf_prog *prog, struct bpf_verifier_env *env,
+			  union bpf_attr *attr, union bpf_attr __user *uattr)
+{
+	u32 i, nfuncs, urec_size, min_size, prev_offset;
+	u32 krec_size = sizeof(struct bpf_func_info);
+	struct bpf_func_info krecord = {};
+	const struct btf_type *type;
+	void __user *urecord;
+	struct btf *btf;
+	int ret = 0;
+
+	nfuncs = attr->func_info_cnt;
+	if (!nfuncs)
+		return 0;
+
+	if (nfuncs != env->subprog_cnt) {
+		verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
+		return -EINVAL;
+	}
+
+	urec_size = attr->func_info_rec_size;
+	if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
+	    urec_size > MAX_FUNCINFO_REC_SIZE ||
+	    urec_size % sizeof(u32)) {
+		verbose(env, "invalid func info rec size %u\n", urec_size);
+		return -EINVAL;
+	}
+
+	btf = btf_get_by_fd(attr->prog_btf_fd);
+	if (IS_ERR(btf)) {
+		verbose(env, "unable to get btf from fd\n");
+		return PTR_ERR(btf);
+	}
+
+	urecord = u64_to_user_ptr(attr->func_info);
+	min_size = min_t(u32, krec_size, urec_size);
+
+	for (i = 0; i < nfuncs; i++) {
+		ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
+		if (ret) {
+			if (ret == -E2BIG) {
+				verbose(env, "nonzero tailing record in func info");
+				/* set the size kernel expects so loader can zero
+				 * out the rest of the record.
+				 */
+				if (put_user(min_size, &uattr->func_info_rec_size))
+					ret = -EFAULT;
+			}
+			goto free_btf;
+		}
+
+		if (copy_from_user(&krecord, urecord, min_size)) {
+			ret = -EFAULT;
+			goto free_btf;
+		}
+
+		/* check insn_offset */
+		if (i == 0) {
+			if (krecord.insn_offset) {
+				verbose(env,
+					"nonzero insn_offset %u for the first func info record",
+					krecord.insn_offset);
+				ret = -EINVAL;
+				goto free_btf;
+			}
+		} else if (krecord.insn_offset <= prev_offset) {
+			verbose(env,
+				"same or smaller insn offset (%u) than previous func info record (%u)",
+				krecord.insn_offset, prev_offset);
+			ret = -EINVAL;
+			goto free_btf;
+		}
+
+		if (env->subprog_info[i].start != krecord.insn_offset) {
+			verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
+			ret = -EINVAL;
+			goto free_btf;
+		}
+
+		/* check type_id */
+		type = btf_type_by_id(btf, krecord.type_id);
+		if (!type || BTF_INFO_KIND(type->info) != BTF_KIND_FUNC) {
+			verbose(env, "invalid type id %d in func info",
+				krecord.type_id);
+			ret = -EINVAL;
+			goto free_btf;
+		}
+
+		if (i == 0)
+			prog->aux->type_id = krecord.type_id;
+		env->subprog_info[i].type_id = krecord.type_id;
+
+		prev_offset = krecord.insn_offset;
+		urecord += urec_size;
+	}
+
+	prog->aux->btf = btf;
+	return 0;
+
+free_btf:
+	btf_put(btf);
+	return ret;
+}
+
 /* check %cur's range satisfies %old's */
 static bool range_within(struct bpf_reg_state *old,
 			 struct bpf_reg_state *cur)
@@ -5939,6 +6049,9 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 		func[i]->aux->name[0] = 'F';
 		func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
 		func[i]->jit_requested = 1;
+		/* the btf will be freed only at prog->aux */
+		func[i]->aux->btf = prog->aux->btf;
+		func[i]->aux->type_id = env->subprog_info[i].type_id;
 		func[i] = bpf_int_jit_compile(func[i]);
 		if (!func[i]->jited) {
 			err = -ENOTSUPP;
@@ -6325,7 +6438,8 @@ static void free_states(struct bpf_verifier_env *env)
 	kfree(env->explored_states);
 }
 
-int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
+int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
+	      union bpf_attr __user *uattr)
 {
 	struct bpf_verifier_env *env;
 	struct bpf_verifier_log *log;
@@ -6397,6 +6511,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
 	if (ret < 0)
 		goto skip_full_check;
 
+	ret = check_btf_func(env->prog, env, attr, uattr);
+	if (ret < 0)
+		goto skip_full_check;
+
 	ret = do_check(env);
 	if (env->cur_state) {
 		free_verifier_state(env->cur_state, true);

From cc19435cb2ee34a3663f0be69f3a4647795b0417 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Mon, 19 Nov 2018 15:29:13 -0800
Subject: [PATCH 47/71] tools/bpf: sync kernel uapi bpf.h header to tools
 directory

The kernel uapi bpf.h is synced to tools directory.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/include/uapi/linux/bpf.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 05d95290b8486..c1554aa074659 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -338,6 +338,10 @@ union bpf_attr {
 		 * (context accesses, allowed helpers, etc).
 		 */
 		__u32		expected_attach_type;
+		__u32		prog_btf_fd;	/* fd pointing to BTF type data */
+		__u32		func_info_rec_size;	/* userspace bpf_func_info size */
+		__aligned_u64	func_info;	/* func info */
+		__u32		func_info_cnt;	/* number of bpf_func_info records */
 	};
 
 	struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -2638,6 +2642,10 @@ struct bpf_prog_info {
 	__u32 nr_jited_func_lens;
 	__aligned_u64 jited_ksyms;
 	__aligned_u64 jited_func_lens;
+	__u32 btf_id;
+	__u32 func_info_rec_size;
+	__aligned_u64 func_info;
+	__u32 func_info_cnt;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {
@@ -2949,4 +2957,9 @@ struct bpf_flow_keys {
 	};
 };
 
+struct bpf_func_info {
+	__u32	insn_offset;
+	__u32	type_id;
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */

From 7e0d0fb5522a388700ceff723af98c47ffa8a0a9 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Mon, 19 Nov 2018 15:29:14 -0800
Subject: [PATCH 48/71] tools/bpf: add new fields for program load in lib/bpf

The new fields are added for program load in lib/bpf so
application uses api bpf_load_program_xattr() is able
to load program with btf and func_info data.

This functionality will be used in next patch
by bpf selftest test_btf.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/bpf.c | 4 ++++
 tools/lib/bpf/bpf.h | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 961e1b9fc5927..9b5cf22c4e64b 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -205,6 +205,10 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
 	attr.log_level = 0;
 	attr.kern_version = load_attr->kern_version;
 	attr.prog_ifindex = load_attr->prog_ifindex;
+	attr.prog_btf_fd = load_attr->prog_btf_fd;
+	attr.func_info_rec_size = load_attr->func_info_rec_size;
+	attr.func_info_cnt = load_attr->func_info_cnt;
+	attr.func_info = ptr_to_u64(load_attr->func_info);
 	memcpy(attr.prog_name, load_attr->name,
 	       min(name_len, BPF_OBJ_NAME_LEN - 1));
 
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 26a51538213cd..8bdfd806253ae 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -74,6 +74,10 @@ struct bpf_load_program_attr {
 	const char *license;
 	__u32 kern_version;
 	__u32 prog_ifindex;
+	__u32 prog_btf_fd;
+	__u32 func_info_rec_size;
+	const void *func_info;
+	__u32 func_info_cnt;
 };
 
 /* Flags to direct loading requirements */

From 4798c4ba3ba94e4da37b2557dfda04f80a94e8d5 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Mon, 19 Nov 2018 15:29:15 -0800
Subject: [PATCH 49/71] tools/bpf: extends test_btf to test load/retrieve
 func_type info

A two function bpf program is loaded with btf and func_info.
After successful prog load, the bpf_get_info syscall is called
to retrieve prog info to ensure the types returned from the
kernel matches the types passed to the kernel from the
user space.

Several negative tests are also added to test loading/retriving
of func_type info.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_btf.c | 332 ++++++++++++++++++++++++-
 1 file changed, 329 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index e0eeee5c8c049..8fd3a16fea4d8 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -5,6 +5,7 @@
 #include <linux/btf.h>
 #include <linux/err.h>
 #include <linux/kernel.h>
+#include <linux/filter.h>
 #include <bpf/bpf.h>
 #include <sys/resource.h>
 #include <libelf.h>
@@ -22,9 +23,13 @@
 #include "bpf_rlimit.h"
 #include "bpf_util.h"
 
+#define MAX_INSNS	512
+#define MAX_SUBPROGS	16
+
 static uint32_t pass_cnt;
 static uint32_t error_cnt;
 static uint32_t skip_cnt;
+static bool jit_enabled;
 
 #define CHECK(condition, format...) ({					\
 	int __ret = !!(condition);					\
@@ -60,6 +65,24 @@ static int __base_pr(const char *format, ...)
 	return err;
 }
 
+static bool is_jit_enabled(void)
+{
+	const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable";
+	bool enabled = false;
+	int sysctl_fd;
+
+	sysctl_fd = open(jit_sysctl, 0, O_RDONLY);
+	if (sysctl_fd != -1) {
+		char tmpc;
+
+		if (read(sysctl_fd, &tmpc, sizeof(tmpc)) == 1)
+			enabled = (tmpc != '0');
+		close(sysctl_fd);
+	}
+
+	return enabled;
+}
+
 #define BTF_INFO_ENC(kind, root, vlen)			\
 	((!!(root) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
 
@@ -115,6 +138,7 @@ static struct args {
 	bool get_info_test;
 	bool pprint_test;
 	bool always_log;
+	bool func_type_test;
 } args;
 
 static char btf_log_buf[BTF_LOG_BUF_SIZE];
@@ -2947,16 +2971,310 @@ static int test_pprint(void)
 	return err;
 }
 
+static struct btf_func_type_test {
+	const char *descr;
+	const char *str_sec;
+	__u32 raw_types[MAX_NR_RAW_TYPES];
+	__u32 str_sec_size;
+	struct bpf_insn insns[MAX_INSNS];
+	__u32 prog_type;
+	__u32 func_info[MAX_SUBPROGS][2];
+	__u32 func_info_rec_size;
+	__u32 func_info_cnt;
+	bool expected_prog_load_failure;
+} func_type_test[] = {
+{
+	.descr = "func_type (main func + one sub)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4),	/* [2] */
+		BTF_FUNC_PROTO_ENC(1, 2),			/* [3] */
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2),
+		BTF_FUNC_PROTO_ENC(1, 2),			/* [4] */
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2),
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+		BTF_FUNC_ENC(NAME_TBD, 3),			/* [5] */
+		BTF_FUNC_ENC(NAME_TBD, 4),			/* [6] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB",
+	.str_sec_size = sizeof("\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB"),
+	.insns = {
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+		BPF_MOV64_IMM(BPF_REG_0, 1),
+		BPF_EXIT_INSN(),
+		BPF_MOV64_IMM(BPF_REG_0, 2),
+		BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	.func_info = { {0, 5}, {3, 6} },
+	.func_info_rec_size = 8,
+	.func_info_cnt = 2,
+},
+
+{
+	.descr = "func_type (Incorrect func_info_rec_size)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4),	/* [2] */
+		BTF_FUNC_PROTO_ENC(1, 2),			/* [3] */
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2),
+		BTF_FUNC_PROTO_ENC(1, 2),			/* [4] */
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2),
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+		BTF_FUNC_ENC(NAME_TBD, 3),			/* [5] */
+		BTF_FUNC_ENC(NAME_TBD, 4),			/* [6] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB",
+	.str_sec_size = sizeof("\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB"),
+	.insns = {
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+		BPF_MOV64_IMM(BPF_REG_0, 1),
+		BPF_EXIT_INSN(),
+		BPF_MOV64_IMM(BPF_REG_0, 2),
+		BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	.func_info = { {0, 5}, {3, 6} },
+	.func_info_rec_size = 4,
+	.func_info_cnt = 2,
+	.expected_prog_load_failure = true,
+},
+
+{
+	.descr = "func_type (Incorrect func_info_cnt)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4),	/* [2] */
+		BTF_FUNC_PROTO_ENC(1, 2),			/* [3] */
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2),
+		BTF_FUNC_PROTO_ENC(1, 2),			/* [4] */
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2),
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+		BTF_FUNC_ENC(NAME_TBD, 3),			/* [5] */
+		BTF_FUNC_ENC(NAME_TBD, 4),			/* [6] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB",
+	.str_sec_size = sizeof("\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB"),
+	.insns = {
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+		BPF_MOV64_IMM(BPF_REG_0, 1),
+		BPF_EXIT_INSN(),
+		BPF_MOV64_IMM(BPF_REG_0, 2),
+		BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	.func_info = { {0, 5}, {3, 6} },
+	.func_info_rec_size = 8,
+	.func_info_cnt = 1,
+	.expected_prog_load_failure = true,
+},
+
+{
+	.descr = "func_type (Incorrect bpf_func_info.insn_offset)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4),	/* [2] */
+		BTF_FUNC_PROTO_ENC(1, 2),			/* [3] */
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2),
+		BTF_FUNC_PROTO_ENC(1, 2),			/* [4] */
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2),
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+		BTF_FUNC_ENC(NAME_TBD, 3),			/* [5] */
+		BTF_FUNC_ENC(NAME_TBD, 4),			/* [6] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB",
+	.str_sec_size = sizeof("\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB"),
+	.insns = {
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+		BPF_MOV64_IMM(BPF_REG_0, 1),
+		BPF_EXIT_INSN(),
+		BPF_MOV64_IMM(BPF_REG_0, 2),
+		BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	.func_info = { {0, 5}, {2, 6} },
+	.func_info_rec_size = 8,
+	.func_info_cnt = 2,
+	.expected_prog_load_failure = true,
+},
+
+};
+
+static size_t probe_prog_length(const struct bpf_insn *fp)
+{
+	size_t len;
+
+	for (len = MAX_INSNS - 1; len > 0; --len)
+		if (fp[len].code != 0 || fp[len].imm != 0)
+			break;
+	return len + 1;
+}
+
+static int do_test_func_type(int test_num)
+{
+	const struct btf_func_type_test *test = &func_type_test[test_num];
+	unsigned int raw_btf_size, info_len, rec_size;
+	int i, btf_fd = -1, prog_fd = -1, err = 0;
+	struct bpf_load_program_attr attr = {};
+	void *raw_btf, *func_info = NULL;
+	struct bpf_prog_info info = {};
+	struct bpf_func_info *finfo;
+
+	fprintf(stderr, "%s......", test->descr);
+	raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types,
+				 test->str_sec, test->str_sec_size,
+				 &raw_btf_size);
+
+	if (!raw_btf)
+		return -1;
+
+	*btf_log_buf = '\0';
+	btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
+			      btf_log_buf, BTF_LOG_BUF_SIZE,
+			      args.always_log);
+	free(raw_btf);
+
+	if (CHECK(btf_fd == -1, "invalid btf_fd errno:%d", errno)) {
+		err = -1;
+		goto done;
+	}
+
+	if (*btf_log_buf && args.always_log)
+		fprintf(stderr, "\n%s", btf_log_buf);
+
+	attr.prog_type = test->prog_type;
+	attr.insns = test->insns;
+	attr.insns_cnt = probe_prog_length(attr.insns);
+	attr.license = "GPL";
+	attr.prog_btf_fd = btf_fd;
+	attr.func_info_rec_size = test->func_info_rec_size;
+	attr.func_info_cnt = test->func_info_cnt;
+	attr.func_info = test->func_info;
+
+	*btf_log_buf = '\0';
+	prog_fd = bpf_load_program_xattr(&attr, btf_log_buf,
+					 BTF_LOG_BUF_SIZE);
+	if (test->expected_prog_load_failure && prog_fd == -1) {
+		err = 0;
+		goto done;
+	}
+	if (CHECK(prog_fd == -1, "invalid prog_id errno:%d", errno)) {
+		fprintf(stderr, "%s\n", btf_log_buf);
+		err = -1;
+		goto done;
+	}
+	if (!jit_enabled) {
+		skip_cnt++;
+		fprintf(stderr, "SKIPPED, please enable sysctl bpf_jit_enable\n");
+		err = 0;
+		goto done;
+	}
+
+	/* get necessary lens */
+	info_len = sizeof(struct bpf_prog_info);
+	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+	if (CHECK(err == -1, "invalid get info (1st) errno:%d", errno)) {
+		fprintf(stderr, "%s\n", btf_log_buf);
+		err = -1;
+		goto done;
+	}
+	if (CHECK(info.func_info_cnt != 2,
+		  "incorrect info.func_info_cnt (1st) %d\n",
+		  info.func_info_cnt)) {
+		err = -1;
+		goto done;
+	}
+	rec_size = info.func_info_rec_size;
+	if (CHECK(rec_size < 4,
+		  "incorrect info.func_info_rec_size (1st) %d\n", rec_size)) {
+		err = -1;
+		goto done;
+	}
+
+	func_info = malloc(info.func_info_cnt * rec_size);
+	if (CHECK(!func_info, "out of memeory")) {
+		err = -1;
+		goto done;
+	}
+
+	/* reset info to only retrieve func_info related data */
+	memset(&info, 0, sizeof(info));
+	info.func_info_cnt = 2;
+	info.func_info_rec_size = rec_size;
+	info.func_info = ptr_to_u64(func_info);
+	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+	if (CHECK(err == -1, "invalid get info (2nd) errno:%d", errno)) {
+		fprintf(stderr, "%s\n", btf_log_buf);
+		err = -1;
+		goto done;
+	}
+	if (CHECK(info.func_info_cnt != 2,
+		  "incorrect info.func_info_cnt (2nd) %d\n",
+		  info.func_info_cnt)) {
+		err = -1;
+		goto done;
+	}
+	if (CHECK(info.func_info_rec_size != rec_size,
+		  "incorrect info.func_info_rec_size (2nd) %d\n",
+		  info.func_info_rec_size)) {
+		err = -1;
+		goto done;
+	}
+
+	finfo = func_info;
+	for (i = 0; i < 2; i++) {
+		if (CHECK(finfo->type_id != test->func_info[i][1],
+			  "incorrect func_type %u expected %u",
+			  finfo->type_id, test->func_info[i][1])) {
+			err = -1;
+			goto done;
+		}
+		finfo = (void *)finfo + rec_size;
+	}
+
+done:
+	if (*btf_log_buf && (err || args.always_log))
+		fprintf(stderr, "\n%s", btf_log_buf);
+
+	if (btf_fd != -1)
+		close(btf_fd);
+	if (prog_fd != -1)
+		close(prog_fd);
+	free(func_info);
+	return err;
+}
+
+static int test_func_type(void)
+{
+	unsigned int i;
+	int err = 0;
+
+	for (i = 0; i < ARRAY_SIZE(func_type_test); i++)
+		err |= count_result(do_test_func_type(i));
+
+	return err;
+}
+
 static void usage(const char *cmd)
 {
-	fprintf(stderr, "Usage: %s [-l] [[-r test_num (1 - %zu)] | [-g test_num (1 - %zu)] | [-f test_num (1 - %zu)] | [-p]]\n",
+	fprintf(stderr, "Usage: %s [-l] [[-r test_num (1 - %zu)] |"
+			" [-g test_num (1 - %zu)] |"
+			" [-f test_num (1 - %zu)] | [-p] | [-k] ]\n",
 		cmd, ARRAY_SIZE(raw_tests), ARRAY_SIZE(get_info_tests),
 		ARRAY_SIZE(file_tests));
 }
 
 static int parse_args(int argc, char **argv)
 {
-	const char *optstr = "lpf:r:g:";
+	const char *optstr = "lpkf:r:g:";
 	int opt;
 
 	while ((opt = getopt(argc, argv, optstr)) != -1) {
@@ -2979,6 +3297,9 @@ static int parse_args(int argc, char **argv)
 		case 'p':
 			args.pprint_test = true;
 			break;
+		case 'k':
+			args.func_type_test = true;
+			break;
 		case 'h':
 			usage(argv[0]);
 			exit(0);
@@ -3032,6 +3353,8 @@ int main(int argc, char **argv)
 	if (args.always_log)
 		libbpf_set_print(__base_pr, __base_pr, __base_pr);
 
+	jit_enabled = is_jit_enabled();
+
 	if (args.raw_test)
 		err |= test_raw();
 
@@ -3044,8 +3367,11 @@ int main(int argc, char **argv)
 	if (args.pprint_test)
 		err |= test_pprint();
 
+	if (args.func_type_test)
+		err |= test_func_type();
+
 	if (args.raw_test || args.get_info_test || args.file_test ||
-	    args.pprint_test)
+	    args.pprint_test || args.func_type_test)
 		goto done;
 
 	err |= test_raw();

From 2993e0515bb44e157c17c9ba7309ba46366b6add Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Mon, 19 Nov 2018 15:29:16 -0800
Subject: [PATCH 50/71] tools/bpf: add support to read .BTF.ext sections

The .BTF section is already available to encode types.
These types can be used for map
pretty print. The whole .BTF will be passed to the
kernel as well for which kernel can verify and return
to the user space for pretty print etc.

The llvm patch at https://reviews.llvm.org/D53736
will generate .BTF section and one more section .BTF.ext.
The .BTF.ext section encodes function type
information and line information. Note that
this patch set only supports function type info.
The functionality is implemented in libbpf.

The .BTF section can be directly loaded into the
kernel, and the .BTF.ext section cannot. The loader
may need to do some relocation and merging,
similar to merging multiple code sections, before
loading into the kernel.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/bpf.c    |  46 ++++++-
 tools/lib/bpf/btf.c    | 274 +++++++++++++++++++++++++++++++++++++++++
 tools/lib/bpf/btf.h    |  50 ++++++++
 tools/lib/bpf/libbpf.c |  87 ++++++++++---
 4 files changed, 442 insertions(+), 15 deletions(-)

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 9b5cf22c4e64b..836447bb4f143 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -186,6 +186,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
 			   char *log_buf, size_t log_buf_sz)
 {
 	union bpf_attr attr;
+	void *finfo = NULL;
 	__u32 name_len;
 	int fd;
 
@@ -216,12 +217,55 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
 	if (fd >= 0 || !log_buf || !log_buf_sz)
 		return fd;
 
+	/* After bpf_prog_load, the kernel may modify certain attributes
+	 * to give user space a hint how to deal with loading failure.
+	 * Check to see whether we can make some changes and load again.
+	 */
+	if (errno == E2BIG && attr.func_info_cnt &&
+	    attr.func_info_rec_size < load_attr->func_info_rec_size) {
+		__u32 actual_rec_size = load_attr->func_info_rec_size;
+		__u32 expected_rec_size = attr.func_info_rec_size;
+		__u32 finfo_cnt = load_attr->func_info_cnt;
+		__u64 finfo_len = actual_rec_size * finfo_cnt;
+		const void *orecord;
+		void *nrecord;
+		int i;
+
+		finfo = malloc(finfo_len);
+		if (!finfo)
+			/* further try with log buffer won't help */
+			return fd;
+
+		/* zero out bytes kernel does not understand */
+		orecord = load_attr->func_info;
+		nrecord = finfo;
+		for (i = 0; i < load_attr->func_info_cnt; i++) {
+			memcpy(nrecord, orecord, expected_rec_size);
+			memset(nrecord + expected_rec_size, 0,
+			       actual_rec_size - expected_rec_size);
+			orecord += actual_rec_size;
+			nrecord += actual_rec_size;
+		}
+
+		/* try with corrected func info records */
+		attr.func_info = ptr_to_u64(finfo);
+		attr.func_info_rec_size = load_attr->func_info_rec_size;
+
+		fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
+
+		if (fd >= 0 || !log_buf || !log_buf_sz)
+			goto done;
+	}
+
 	/* Try again with log */
 	attr.log_buf = ptr_to_u64(log_buf);
 	attr.log_size = log_buf_sz;
 	attr.log_level = 1;
 	log_buf[0] = 0;
-	return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
+	fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
+done:
+	free(finfo);
+	return fd;
 }
 
 int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 31225e64766fd..fe87cb48a6a99 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -37,6 +37,18 @@ struct btf {
 	int fd;
 };
 
+struct btf_ext {
+	void *func_info;
+	__u32 func_info_rec_size;
+	__u32 func_info_len;
+};
+
+/* The minimum bpf_func_info checked by the loader */
+struct bpf_func_info_min {
+	__u32   insn_offset;
+	__u32   type_id;
+};
+
 static int btf_add_type(struct btf *btf, struct btf_type *t)
 {
 	if (btf->types_size - btf->nr_types < 2) {
@@ -397,3 +409,265 @@ const char *btf__name_by_offset(const struct btf *btf, __u32 offset)
 	else
 		return NULL;
 }
+
+static int btf_ext_validate_func_info(const void *finfo, __u32 size,
+				      btf_print_fn_t err_log)
+{
+	int sec_hdrlen = sizeof(struct btf_sec_func_info);
+	__u32 size_left, num_records, record_size;
+	const struct btf_sec_func_info *sinfo;
+	__u64 total_record_size;
+
+	/* At least a func_info record size */
+	if (size < sizeof(__u32)) {
+		elog("BTF.ext func_info record size not found");
+		return -EINVAL;
+	}
+
+	/* The record size needs to meet below minimum standard */
+	record_size = *(__u32 *)finfo;
+	if (record_size < sizeof(struct bpf_func_info_min) ||
+	    record_size % sizeof(__u32)) {
+		elog("BTF.ext func_info invalid record size");
+		return -EINVAL;
+	}
+
+	sinfo = finfo + sizeof(__u32);
+	size_left = size - sizeof(__u32);
+
+	/* If no func_info records, return failure now so .BTF.ext
+	 * won't be used.
+	 */
+	if (!size_left) {
+		elog("BTF.ext no func info records");
+		return -EINVAL;
+	}
+
+	while (size_left) {
+		if (size_left < sec_hdrlen) {
+			elog("BTF.ext func_info header not found");
+			return -EINVAL;
+		}
+
+		num_records = sinfo->num_func_info;
+		if (num_records == 0) {
+			elog("incorrect BTF.ext num_func_info");
+			return -EINVAL;
+		}
+
+		total_record_size = sec_hdrlen +
+				    (__u64)num_records * record_size;
+		if (size_left < total_record_size) {
+			elog("incorrect BTF.ext num_func_info");
+			return -EINVAL;
+		}
+
+		size_left -= total_record_size;
+		sinfo = (void *)sinfo + total_record_size;
+	}
+
+	return 0;
+}
+
+static int btf_ext_parse_hdr(__u8 *data, __u32 data_size,
+			     btf_print_fn_t err_log)
+{
+	const struct btf_ext_header *hdr = (struct btf_ext_header *)data;
+	__u32 meta_left, last_func_info_pos;
+	void *finfo;
+
+	if (data_size < offsetof(struct btf_ext_header, func_info_off) ||
+	    data_size < hdr->hdr_len) {
+		elog("BTF.ext header not found");
+		return -EINVAL;
+	}
+
+	if (hdr->magic != BTF_MAGIC) {
+		elog("Invalid BTF.ext magic:%x\n", hdr->magic);
+		return -EINVAL;
+	}
+
+	if (hdr->version != BTF_VERSION) {
+		elog("Unsupported BTF.ext version:%u\n", hdr->version);
+		return -ENOTSUP;
+	}
+
+	if (hdr->flags) {
+		elog("Unsupported BTF.ext flags:%x\n", hdr->flags);
+		return -ENOTSUP;
+	}
+
+	meta_left = data_size - hdr->hdr_len;
+	if (!meta_left) {
+		elog("BTF.ext has no data\n");
+		return -EINVAL;
+	}
+
+	if (meta_left < hdr->func_info_off) {
+		elog("Invalid BTF.ext func_info section offset:%u\n",
+		     hdr->func_info_off);
+		return -EINVAL;
+	}
+
+	if (hdr->func_info_off & 0x03) {
+		elog("BTF.ext func_info section is not aligned to 4 bytes\n");
+		return -EINVAL;
+	}
+
+	last_func_info_pos = hdr->hdr_len + hdr->func_info_off +
+			     hdr->func_info_len;
+	if (last_func_info_pos > data_size) {
+		elog("Invalid BTF.ext func_info section size:%u\n",
+		     hdr->func_info_len);
+		return -EINVAL;
+	}
+
+	finfo = data + hdr->hdr_len + hdr->func_info_off;
+	return btf_ext_validate_func_info(finfo, hdr->func_info_len,
+					  err_log);
+}
+
+void btf_ext__free(struct btf_ext *btf_ext)
+{
+	if (!btf_ext)
+		return;
+
+	free(btf_ext->func_info);
+	free(btf_ext);
+}
+
+struct btf_ext *btf_ext__new(__u8 *data, __u32 size, btf_print_fn_t err_log)
+{
+	const struct btf_ext_header *hdr;
+	struct btf_ext *btf_ext;
+	void *org_fdata, *fdata;
+	__u32 hdrlen, size_u32;
+	int err;
+
+	err = btf_ext_parse_hdr(data, size, err_log);
+	if (err)
+		return ERR_PTR(err);
+
+	btf_ext = calloc(1, sizeof(struct btf_ext));
+	if (!btf_ext)
+		return ERR_PTR(-ENOMEM);
+
+	hdr = (const struct btf_ext_header *)data;
+	hdrlen = hdr->hdr_len;
+	size_u32 = sizeof(__u32);
+	fdata = malloc(hdr->func_info_len - size_u32);
+	if (!fdata) {
+		free(btf_ext);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	/* remember record size and copy rest of func_info data */
+	org_fdata = data + hdrlen + hdr->func_info_off;
+	btf_ext->func_info_rec_size = *(__u32 *)org_fdata;
+	memcpy(fdata, org_fdata + size_u32, hdr->func_info_len - size_u32);
+	btf_ext->func_info = fdata;
+	btf_ext->func_info_len = hdr->func_info_len - size_u32;
+
+	return btf_ext;
+}
+
+int btf_ext__reloc_init(struct btf *btf, struct btf_ext *btf_ext,
+			const char *sec_name, void **func_info,
+			__u32 *func_info_rec_size, __u32 *func_info_len)
+{
+	__u32 sec_hdrlen = sizeof(struct btf_sec_func_info);
+	__u32 i, record_size, records_len;
+	struct btf_sec_func_info *sinfo;
+	const char *info_sec_name;
+	__s64 remain_len;
+	void *data;
+
+	record_size = btf_ext->func_info_rec_size;
+	sinfo = btf_ext->func_info;
+	remain_len = btf_ext->func_info_len;
+
+	while (remain_len > 0) {
+		records_len = sinfo->num_func_info * record_size;
+		info_sec_name = btf__name_by_offset(btf, sinfo->sec_name_off);
+		if (strcmp(info_sec_name, sec_name)) {
+			remain_len -= sec_hdrlen + records_len;
+			sinfo = (void *)sinfo + sec_hdrlen + records_len;
+			continue;
+		}
+
+		data = malloc(records_len);
+		if (!data)
+			return -ENOMEM;
+
+		memcpy(data, sinfo->data, records_len);
+
+		/* adjust the insn_offset, the data in .BTF.ext is
+		 * the actual byte offset, and the kernel expects
+		 * the offset in term of bpf_insn.
+		 *
+		 * adjust the insn offset only, the rest data will
+		 * be passed to kernel.
+		 */
+		for (i = 0; i < sinfo->num_func_info; i++) {
+			struct bpf_func_info_min *record;
+
+			record = data + i * record_size;
+			record->insn_offset /= sizeof(struct bpf_insn);
+		}
+
+		*func_info = data;
+		*func_info_len = records_len;
+		*func_info_rec_size = record_size;
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+int btf_ext__reloc(struct btf *btf, struct btf_ext *btf_ext,
+		   const char *sec_name, __u32 insns_cnt,
+		   void **func_info, __u32 *func_info_len)
+{
+	__u32 sec_hdrlen = sizeof(struct btf_sec_func_info);
+	__u32 i, record_size, existing_flen, records_len;
+	struct btf_sec_func_info *sinfo;
+	const char *info_sec_name;
+	__u64 remain_len;
+	void *data;
+
+	record_size = btf_ext->func_info_rec_size;
+	sinfo = btf_ext->func_info;
+	remain_len = btf_ext->func_info_len;
+	while (remain_len > 0) {
+		records_len = sinfo->num_func_info * record_size;
+		info_sec_name = btf__name_by_offset(btf, sinfo->sec_name_off);
+		if (strcmp(info_sec_name, sec_name)) {
+			remain_len -= sec_hdrlen + records_len;
+			sinfo = (void *)sinfo + sec_hdrlen + records_len;
+			continue;
+		}
+
+		existing_flen = *func_info_len;
+		data = realloc(*func_info, existing_flen + records_len);
+		if (!data)
+			return -ENOMEM;
+
+		memcpy(data + existing_flen, sinfo->data, records_len);
+		/* adjust insn_offset only, the rest data will be passed
+		 * to the kernel.
+		 */
+		for (i = 0; i < sinfo->num_func_info; i++) {
+			struct bpf_func_info_min *record;
+
+			record = data + existing_flen + i * record_size;
+			record->insn_offset =
+				record->insn_offset / sizeof(struct bpf_insn) +
+				insns_cnt;
+		}
+		*func_info = data;
+		*func_info_len = existing_flen + records_len;
+		return 0;
+	}
+
+	return -EINVAL;
+}
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index b77e7080f7e73..578171e8cb265 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -11,10 +11,51 @@
 #endif
 
 #define BTF_ELF_SEC ".BTF"
+#define BTF_EXT_ELF_SEC ".BTF.ext"
 
 struct btf;
+struct btf_ext;
 struct btf_type;
 
+/*
+ * The .BTF.ext ELF section layout defined as
+ *   struct btf_ext_header
+ *   func_info subsection
+ *
+ * The func_info subsection layout:
+ *   record size for struct bpf_func_info in the func_info subsection
+ *   struct btf_sec_func_info for section #1
+ *   a list of bpf_func_info records for section #1
+ *     where struct bpf_func_info mimics one in include/uapi/linux/bpf.h
+ *     but may not be identical
+ *   struct btf_sec_func_info for section #2
+ *   a list of bpf_func_info records for section #2
+ *   ......
+ *
+ * Note that the bpf_func_info record size in .BTF.ext may not
+ * be the same as the one defined in include/uapi/linux/bpf.h.
+ * The loader should ensure that record_size meets minimum
+ * requirement and pass the record as is to the kernel. The
+ * kernel will handle the func_info properly based on its contents.
+ */
+struct btf_ext_header {
+	__u16	magic;
+	__u8	version;
+	__u8	flags;
+	__u32	hdr_len;
+
+	/* All offsets are in bytes relative to the end of this header */
+	__u32	func_info_off;
+	__u32	func_info_len;
+};
+
+struct btf_sec_func_info {
+	__u32	sec_name_off;
+	__u32	num_func_info;
+	/* Followed by num_func_info number of bpf func_info records */
+	__u8	data[0];
+};
+
 typedef int (*btf_print_fn_t)(const char *, ...)
 	__attribute__((format(printf, 1, 2)));
 
@@ -29,4 +70,13 @@ LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id);
 LIBBPF_API int btf__fd(const struct btf *btf);
 LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset);
 
+struct btf_ext *btf_ext__new(__u8 *data, __u32 size, btf_print_fn_t err_log);
+void btf_ext__free(struct btf_ext *btf_ext);
+int btf_ext__reloc_init(struct btf *btf, struct btf_ext *btf_ext,
+			const char *sec_name, void **func_info,
+			__u32 *func_info_rec_size, __u32 *func_info_len);
+int btf_ext__reloc(struct btf *btf, struct btf_ext *btf_ext,
+		   const char *sec_name, __u32 insns_cnt, void **func_info,
+		   __u32 *func_info_len);
+
 #endif /* __LIBBPF_BTF_H */
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index a01eb9584e522..cb6565d796034 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -156,6 +156,10 @@ struct bpf_program {
 	bpf_program_clear_priv_t clear_priv;
 
 	enum bpf_attach_type expected_attach_type;
+	int btf_fd;
+	void *func_info;
+	__u32 func_info_rec_size;
+	__u32 func_info_len;
 };
 
 struct bpf_map {
@@ -212,6 +216,7 @@ struct bpf_object {
 	struct list_head list;
 
 	struct btf *btf;
+	struct btf_ext *btf_ext;
 
 	void *priv;
 	bpf_object_clear_priv_t clear_priv;
@@ -241,6 +246,9 @@ void bpf_program__unload(struct bpf_program *prog)
 
 	prog->instances.nr = -1;
 	zfree(&prog->instances.fds);
+
+	zclose(prog->btf_fd);
+	zfree(&prog->func_info);
 }
 
 static void bpf_program__exit(struct bpf_program *prog)
@@ -315,6 +323,7 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx,
 	prog->instances.fds = NULL;
 	prog->instances.nr = -1;
 	prog->type = BPF_PROG_TYPE_KPROBE;
+	prog->btf_fd = -1;
 
 	return 0;
 errout:
@@ -807,6 +816,15 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
 					   BTF_ELF_SEC, PTR_ERR(obj->btf));
 				obj->btf = NULL;
 			}
+		} else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
+			obj->btf_ext = btf_ext__new(data->d_buf, data->d_size,
+						    __pr_debug);
+			if (IS_ERR(obj->btf_ext)) {
+				pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n",
+					   BTF_EXT_ELF_SEC,
+					   PTR_ERR(obj->btf_ext));
+				obj->btf_ext = NULL;
+			}
 		} else if (sh.sh_type == SHT_SYMTAB) {
 			if (obj->efile.symbols) {
 				pr_warning("bpf: multiple SYMTAB in %s\n",
@@ -1190,6 +1208,7 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj,
 	struct bpf_insn *insn, *new_insn;
 	struct bpf_program *text;
 	size_t new_cnt;
+	int err;
 
 	if (relo->type != RELO_CALL)
 		return -LIBBPF_ERRNO__RELOC;
@@ -1212,6 +1231,20 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj,
 			pr_warning("oom in prog realloc\n");
 			return -ENOMEM;
 		}
+
+		if (obj->btf && obj->btf_ext) {
+			err = btf_ext__reloc(obj->btf, obj->btf_ext,
+					     text->section_name,
+					     prog->insns_cnt,
+					     &prog->func_info,
+					     &prog->func_info_len);
+			if (err) {
+				pr_warning("error in btf_ext__reloc for sec %s\n",
+					   text->section_name);
+				return err;
+			}
+		}
+
 		memcpy(new_insn + prog->insns_cnt, text->insns,
 		       text->insns_cnt * sizeof(*insn));
 		prog->insns = new_insn;
@@ -1231,7 +1264,24 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
 {
 	int i, err;
 
-	if (!prog || !prog->reloc_desc)
+	if (!prog)
+		return 0;
+
+	if (obj->btf && obj->btf_ext) {
+		err = btf_ext__reloc_init(obj->btf, obj->btf_ext,
+					  prog->section_name,
+					  &prog->func_info,
+					  &prog->func_info_rec_size,
+					  &prog->func_info_len);
+		if (err) {
+			pr_warning("err in btf_ext__reloc_init for sec %s\n",
+				   prog->section_name);
+			return err;
+		}
+		prog->btf_fd = btf__fd(obj->btf);
+	}
+
+	if (!prog->reloc_desc)
 		return 0;
 
 	for (i = 0; i < prog->nr_reloc; i++) {
@@ -1319,9 +1369,9 @@ static int bpf_object__collect_reloc(struct bpf_object *obj)
 }
 
 static int
-load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type,
-	     const char *name, struct bpf_insn *insns, int insns_cnt,
-	     char *license, __u32 kern_version, int *pfd, int prog_ifindex)
+load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
+	     char *license, __u32 kern_version, int *pfd,
+	     __u32 func_info_cnt)
 {
 	struct bpf_load_program_attr load_attr;
 	char *cp, errmsg[STRERR_BUFSIZE];
@@ -1329,14 +1379,18 @@ load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type,
 	int ret;
 
 	memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
-	load_attr.prog_type = type;
-	load_attr.expected_attach_type = expected_attach_type;
-	load_attr.name = name;
+	load_attr.prog_type = prog->type;
+	load_attr.expected_attach_type = prog->expected_attach_type;
+	load_attr.name = prog->name;
 	load_attr.insns = insns;
 	load_attr.insns_cnt = insns_cnt;
 	load_attr.license = license;
 	load_attr.kern_version = kern_version;
-	load_attr.prog_ifindex = prog_ifindex;
+	load_attr.prog_ifindex = prog->prog_ifindex;
+	load_attr.prog_btf_fd = prog->btf_fd;
+	load_attr.func_info = prog->func_info;
+	load_attr.func_info_rec_size = prog->func_info_rec_size;
+	load_attr.func_info_cnt = func_info_cnt;
 
 	if (!load_attr.insns || !load_attr.insns_cnt)
 		return -EINVAL;
@@ -1394,8 +1448,14 @@ int
 bpf_program__load(struct bpf_program *prog,
 		  char *license, __u32 kern_version)
 {
+	__u32 func_info_cnt;
 	int err = 0, fd, i;
 
+	if (prog->func_info_len == 0)
+		func_info_cnt = 0;
+	else
+		func_info_cnt = prog->func_info_len / prog->func_info_rec_size;
+
 	if (prog->instances.nr < 0 || !prog->instances.fds) {
 		if (prog->preprocessor) {
 			pr_warning("Internal error: can't load program '%s'\n",
@@ -1417,10 +1477,9 @@ bpf_program__load(struct bpf_program *prog,
 			pr_warning("Program '%s' is inconsistent: nr(%d) != 1\n",
 				   prog->section_name, prog->instances.nr);
 		}
-		err = load_program(prog->type, prog->expected_attach_type,
-				   prog->name, prog->insns, prog->insns_cnt,
+		err = load_program(prog, prog->insns, prog->insns_cnt,
 				   license, kern_version, &fd,
-				   prog->prog_ifindex);
+				   func_info_cnt);
 		if (!err)
 			prog->instances.fds[0] = fd;
 		goto out;
@@ -1448,11 +1507,10 @@ bpf_program__load(struct bpf_program *prog,
 			continue;
 		}
 
-		err = load_program(prog->type, prog->expected_attach_type,
-				   prog->name, result.new_insn_ptr,
+		err = load_program(prog, result.new_insn_ptr,
 				   result.new_insn_cnt,
 				   license, kern_version, &fd,
-				   prog->prog_ifindex);
+				   func_info_cnt);
 
 		if (err) {
 			pr_warning("Loading the %dth instance of program '%s' failed\n",
@@ -2120,6 +2178,7 @@ void bpf_object__close(struct bpf_object *obj)
 	bpf_object__elf_finish(obj);
 	bpf_object__unload(obj);
 	btf__free(obj->btf);
+	btf_ext__free(obj->btf_ext);
 
 	for (i = 0; i < obj->nr_maps; i++) {
 		zfree(&obj->maps[i].name);

From 9ce6ae22c8e878aee7a96836b2ed9fd9a8173e41 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Mon, 19 Nov 2018 15:29:17 -0800
Subject: [PATCH 51/71] tools/bpf: do not use pahole if clang/llvm can generate
 BTF sections

Add additional checks in tools/testing/selftests/bpf and
samples/bpf such that if clang/llvm compiler can generate
BTF sections, do not use pahole.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 samples/bpf/Makefile                 | 8 ++++++++
 tools/testing/selftests/bpf/Makefile | 8 ++++++++
 2 files changed, 16 insertions(+)

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index be0a961450bc2..35444f4a846bd 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -208,12 +208,20 @@ endif
 BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
 BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
 BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm')
+BTF_LLVM_PROBE := $(shell echo "int main() { return 0; }" | \
+			  $(CLANG) -target bpf -O2 -g -c -x c - -o ./llvm_btf_verify.o; \
+			  readelf -S ./llvm_btf_verify.o | grep BTF; \
+			  /bin/rm -f ./llvm_btf_verify.o)
 
+ifneq ($(BTF_LLVM_PROBE),)
+	EXTRA_CFLAGS += -g
+else
 ifneq ($(and $(BTF_LLC_PROBE),$(BTF_PAHOLE_PROBE),$(BTF_OBJCOPY_PROBE)),)
 	EXTRA_CFLAGS += -g
 	LLC_FLAGS += -mattr=dwarfris
 	DWARF2BTF = y
 endif
+endif
 
 # Trick to allow make to be run from this directory
 all:
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 57b4712a62762..1dde03ea14848 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -126,7 +126,14 @@ $(OUTPUT)/test_stack_map.o: test_queue_stack_map.h
 BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
 BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
 BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm')
+BTF_LLVM_PROBE := $(shell echo "int main() { return 0; }" | \
+			  $(CLANG) -target bpf -O2 -g -c -x c - -o ./llvm_btf_verify.o; \
+			  readelf -S ./llvm_btf_verify.o | grep BTF; \
+			  /bin/rm -f ./llvm_btf_verify.o)
 
+ifneq ($(BTF_LLVM_PROBE),)
+	CLANG_FLAGS += -g
+else
 ifneq ($(BTF_LLC_PROBE),)
 ifneq ($(BTF_PAHOLE_PROBE),)
 ifneq ($(BTF_OBJCOPY_PROBE),)
@@ -136,6 +143,7 @@ ifneq ($(BTF_OBJCOPY_PROBE),)
 endif
 endif
 endif
+endif
 
 # Have one program compiled without "-target bpf" to test whether libbpf loads
 # it successfully

From d7f5b5e051554c91bab995b67101af4625af591a Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Mon, 19 Nov 2018 15:29:18 -0800
Subject: [PATCH 52/71] tools/bpf: refactor to implement btf_get_from_id() in
 lib/bpf

The function get_btf() is implemented in tools/bpf/bpftool/map.c
to get a btf structure given a map_info. This patch
refactored this function to be function btf_get_from_id()
in tools/lib/bpf so that it can be used later.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/map.c | 68 ++--------------------------------------
 tools/lib/bpf/btf.c     | 69 +++++++++++++++++++++++++++++++++++++++++
 tools/lib/bpf/btf.h     |  1 +
 3 files changed, 72 insertions(+), 66 deletions(-)

diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index dc9a8967ab8ca..a1ae2a3e9fefc 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -215,70 +215,6 @@ static int do_dump_btf(const struct btf_dumper *d,
 	return ret;
 }
 
-static int get_btf(struct bpf_map_info *map_info, struct btf **btf)
-{
-	struct bpf_btf_info btf_info = { 0 };
-	__u32 len = sizeof(btf_info);
-	__u32 last_size;
-	int btf_fd;
-	void *ptr;
-	int err;
-
-	err = 0;
-	*btf = NULL;
-	btf_fd = bpf_btf_get_fd_by_id(map_info->btf_id);
-	if (btf_fd < 0)
-		return 0;
-
-	/* we won't know btf_size until we call bpf_obj_get_info_by_fd(). so
-	 * let's start with a sane default - 4KiB here - and resize it only if
-	 * bpf_obj_get_info_by_fd() needs a bigger buffer.
-	 */
-	btf_info.btf_size = 4096;
-	last_size = btf_info.btf_size;
-	ptr = malloc(last_size);
-	if (!ptr) {
-		err = -ENOMEM;
-		goto exit_free;
-	}
-
-	bzero(ptr, last_size);
-	btf_info.btf = ptr_to_u64(ptr);
-	err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
-
-	if (!err && btf_info.btf_size > last_size) {
-		void *temp_ptr;
-
-		last_size = btf_info.btf_size;
-		temp_ptr = realloc(ptr, last_size);
-		if (!temp_ptr) {
-			err = -ENOMEM;
-			goto exit_free;
-		}
-		ptr = temp_ptr;
-		bzero(ptr, last_size);
-		btf_info.btf = ptr_to_u64(ptr);
-		err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
-	}
-
-	if (err || btf_info.btf_size > last_size) {
-		err = errno;
-		goto exit_free;
-	}
-
-	*btf = btf__new((__u8 *)btf_info.btf, btf_info.btf_size, NULL);
-	if (IS_ERR(*btf)) {
-		err = PTR_ERR(*btf);
-		*btf = NULL;
-	}
-
-exit_free:
-	close(btf_fd);
-	free(ptr);
-
-	return err;
-}
-
 static json_writer_t *get_btf_writer(void)
 {
 	json_writer_t *jw = jsonw_new(stdout);
@@ -775,7 +711,7 @@ static int do_dump(int argc, char **argv)
 
 	prev_key = NULL;
 
-	err = get_btf(&info, &btf);
+	err = btf_get_from_id(info.btf_id, &btf);
 	if (err) {
 		p_err("failed to get btf");
 		goto exit_free;
@@ -919,7 +855,7 @@ static int do_lookup(int argc, char **argv)
 	}
 
 	/* here means bpf_map_lookup_elem() succeeded */
-	err = get_btf(&info, &btf);
+	err = btf_get_from_id(info.btf_id, &btf);
 	if (err) {
 		p_err("failed to get btf");
 		goto exit_free;
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index fe87cb48a6a99..13ddc4bd24ee3 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -49,6 +49,11 @@ struct bpf_func_info_min {
 	__u32   type_id;
 };
 
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+	return (__u64) (unsigned long) ptr;
+}
+
 static int btf_add_type(struct btf *btf, struct btf_type *t)
 {
 	if (btf->types_size - btf->nr_types < 2) {
@@ -410,6 +415,70 @@ const char *btf__name_by_offset(const struct btf *btf, __u32 offset)
 		return NULL;
 }
 
+int btf_get_from_id(__u32 id, struct btf **btf)
+{
+	struct bpf_btf_info btf_info = { 0 };
+	__u32 len = sizeof(btf_info);
+	__u32 last_size;
+	int btf_fd;
+	void *ptr;
+	int err;
+
+	err = 0;
+	*btf = NULL;
+	btf_fd = bpf_btf_get_fd_by_id(id);
+	if (btf_fd < 0)
+		return 0;
+
+	/* we won't know btf_size until we call bpf_obj_get_info_by_fd(). so
+	 * let's start with a sane default - 4KiB here - and resize it only if
+	 * bpf_obj_get_info_by_fd() needs a bigger buffer.
+	 */
+	btf_info.btf_size = 4096;
+	last_size = btf_info.btf_size;
+	ptr = malloc(last_size);
+	if (!ptr) {
+		err = -ENOMEM;
+		goto exit_free;
+	}
+
+	bzero(ptr, last_size);
+	btf_info.btf = ptr_to_u64(ptr);
+	err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
+
+	if (!err && btf_info.btf_size > last_size) {
+		void *temp_ptr;
+
+		last_size = btf_info.btf_size;
+		temp_ptr = realloc(ptr, last_size);
+		if (!temp_ptr) {
+			err = -ENOMEM;
+			goto exit_free;
+		}
+		ptr = temp_ptr;
+		bzero(ptr, last_size);
+		btf_info.btf = ptr_to_u64(ptr);
+		err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
+	}
+
+	if (err || btf_info.btf_size > last_size) {
+		err = errno;
+		goto exit_free;
+	}
+
+	*btf = btf__new((__u8 *)btf_info.btf, btf_info.btf_size, NULL);
+	if (IS_ERR(*btf)) {
+		err = PTR_ERR(*btf);
+		*btf = NULL;
+	}
+
+exit_free:
+	close(btf_fd);
+	free(ptr);
+
+	return err;
+}
+
 static int btf_ext_validate_func_info(const void *finfo, __u32 size,
 				      btf_print_fn_t err_log)
 {
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 578171e8cb265..386b2ffc32a3e 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -69,6 +69,7 @@ LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id);
 LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id);
 LIBBPF_API int btf__fd(const struct btf *btf);
 LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset);
+LIBBPF_API int btf_get_from_id(__u32 id, struct btf **btf);
 
 struct btf_ext *btf_ext__new(__u8 *data, __u32 size, btf_print_fn_t err_log);
 void btf_ext__free(struct btf_ext *btf_ext);

From 999d82cbc04416cc7f2b5cb6daab947c16f0fd3a Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Mon, 19 Nov 2018 15:29:20 -0800
Subject: [PATCH 53/71] tools/bpf: enhance test_btf file testing to test func
 info

Change the bpf programs test_btf_haskv.c and test_btf_nokv.c to
have two sections, and enhance test_btf.c test_file feature
to test btf func_info returned by the kernel.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_btf.c       | 117 +++++++++++++++++--
 tools/testing/selftests/bpf/test_btf_haskv.c |  16 ++-
 tools/testing/selftests/bpf/test_btf_nokv.c  |  16 ++-
 3 files changed, 136 insertions(+), 13 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index 8fd3a16fea4d8..7b1b160d6e67b 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -2434,13 +2434,13 @@ static struct btf_file_test file_tests[] = {
 },
 };
 
-static int file_has_btf_elf(const char *fn)
+static int file_has_btf_elf(const char *fn, bool *has_btf_ext)
 {
 	Elf_Scn *scn = NULL;
 	GElf_Ehdr ehdr;
+	int ret = 0;
 	int elf_fd;
 	Elf *elf;
-	int ret;
 
 	if (CHECK(elf_version(EV_CURRENT) == EV_NONE,
 		  "elf_version(EV_CURRENT) == EV_NONE"))
@@ -2472,14 +2472,12 @@ static int file_has_btf_elf(const char *fn)
 		}
 
 		sh_name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name);
-		if (!strcmp(sh_name, BTF_ELF_SEC)) {
+		if (!strcmp(sh_name, BTF_ELF_SEC))
 			ret = 1;
-			goto done;
-		}
+		if (!strcmp(sh_name, BTF_EXT_ELF_SEC))
+			*has_btf_ext = true;
 	}
 
-	ret = 0;
-
 done:
 	close(elf_fd);
 	elf_end(elf);
@@ -2489,15 +2487,24 @@ static int file_has_btf_elf(const char *fn)
 static int do_test_file(unsigned int test_num)
 {
 	const struct btf_file_test *test = &file_tests[test_num - 1];
+	const char *expected_fnames[] = {"_dummy_tracepoint",
+					 "test_long_fname_1",
+					 "test_long_fname_2"};
+	struct bpf_prog_info info = {};
 	struct bpf_object *obj = NULL;
+	struct bpf_func_info *finfo;
 	struct bpf_program *prog;
+	__u32 info_len, rec_size;
+	bool has_btf_ext = false;
+	struct btf *btf = NULL;
+	void *func_info = NULL;
 	struct bpf_map *map;
-	int err;
+	int i, err, prog_fd;
 
 	fprintf(stderr, "BTF libbpf test[%u] (%s): ", test_num,
 		test->file);
 
-	err = file_has_btf_elf(test->file);
+	err = file_has_btf_elf(test->file, &has_btf_ext);
 	if (err == -1)
 		return err;
 
@@ -2525,6 +2532,7 @@ static int do_test_file(unsigned int test_num)
 	err = bpf_object__load(obj);
 	if (CHECK(err < 0, "bpf_object__load: %d", err))
 		goto done;
+	prog_fd = bpf_program__fd(prog);
 
 	map = bpf_object__find_map_by_name(obj, "btf_map");
 	if (CHECK(!map, "btf_map not found")) {
@@ -2539,9 +2547,100 @@ static int do_test_file(unsigned int test_num)
 		  test->btf_kv_notfound))
 		goto done;
 
+	if (!jit_enabled || !has_btf_ext)
+		goto skip_jit;
+
+	/* get necessary program info */
+	info_len = sizeof(struct bpf_prog_info);
+	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+
+	if (CHECK(err == -1, "invalid get info (1st) errno:%d", errno)) {
+		fprintf(stderr, "%s\n", btf_log_buf);
+		err = -1;
+		goto done;
+	}
+	if (CHECK(info.func_info_cnt != 3,
+		  "incorrect info.func_info_cnt (1st) %d",
+		  info.func_info_cnt)) {
+		err = -1;
+		goto done;
+	}
+	rec_size = info.func_info_rec_size;
+	if (CHECK(rec_size < 4,
+		  "incorrect info.func_info_rec_size (1st) %d\n", rec_size)) {
+		err = -1;
+		goto done;
+	}
+
+	func_info = malloc(info.func_info_cnt * rec_size);
+	if (CHECK(!func_info, "out of memeory")) {
+		err = -1;
+		goto done;
+	}
+
+	/* reset info to only retrieve func_info related data */
+	memset(&info, 0, sizeof(info));
+	info.func_info_cnt = 3;
+	info.func_info_rec_size = rec_size;
+	info.func_info = ptr_to_u64(func_info);
+
+	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+
+	if (CHECK(err == -1, "invalid get info (2nd) errno:%d", errno)) {
+		fprintf(stderr, "%s\n", btf_log_buf);
+		err = -1;
+		goto done;
+	}
+	if (CHECK(info.func_info_cnt != 3,
+		  "incorrect info.func_info_cnt (2nd) %d",
+		  info.func_info_cnt)) {
+		err = -1;
+		goto done;
+	}
+	if (CHECK(info.func_info_rec_size != rec_size,
+		  "incorrect info.func_info_rec_size (2nd) %d",
+		  info.func_info_rec_size)) {
+		err = -1;
+		goto done;
+	}
+
+	err = btf_get_from_id(info.btf_id, &btf);
+	if (CHECK(err, "cannot get btf from kernel, err: %d", err))
+		goto done;
+
+	/* check three functions */
+	finfo = func_info;
+	for (i = 0; i < 3; i++) {
+		const struct btf_type *t;
+		const char *fname;
+
+		t = btf__type_by_id(btf, finfo->type_id);
+		if (CHECK(!t, "btf__type_by_id failure: id %u",
+			  finfo->type_id)) {
+			err = -1;
+			goto done;
+		}
+
+		fname = btf__name_by_offset(btf, t->name_off);
+		err = strcmp(fname, expected_fnames[i]);
+		/* for the second and third functions in .text section,
+		 * the compiler may order them either way.
+		 */
+		if (i && err)
+			err = strcmp(fname, expected_fnames[3 - i]);
+		if (CHECK(err, "incorrect fname %s", fname ? : "")) {
+			err = -1;
+			goto done;
+		}
+
+		finfo = (void *)finfo + rec_size;
+	}
+
+skip_jit:
 	fprintf(stderr, "OK");
 
 done:
+	free(func_info);
 	bpf_object__close(obj);
 	return err;
 }
diff --git a/tools/testing/selftests/bpf/test_btf_haskv.c b/tools/testing/selftests/bpf/test_btf_haskv.c
index b21b876f475d8..e5c79fe0ffdb2 100644
--- a/tools/testing/selftests/bpf/test_btf_haskv.c
+++ b/tools/testing/selftests/bpf/test_btf_haskv.c
@@ -24,8 +24,8 @@ struct dummy_tracepoint_args {
 	struct sock *sock;
 };
 
-SEC("dummy_tracepoint")
-int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
+__attribute__((noinline))
+static int test_long_fname_2(struct dummy_tracepoint_args *arg)
 {
 	struct ipv_counts *counts;
 	int key = 0;
@@ -42,4 +42,16 @@ int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
 	return 0;
 }
 
+__attribute__((noinline))
+static int test_long_fname_1(struct dummy_tracepoint_args *arg)
+{
+	return test_long_fname_2(arg);
+}
+
+SEC("dummy_tracepoint")
+int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
+{
+	return test_long_fname_1(arg);
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_btf_nokv.c b/tools/testing/selftests/bpf/test_btf_nokv.c
index 0ed8e088eebf9..434188c377743 100644
--- a/tools/testing/selftests/bpf/test_btf_nokv.c
+++ b/tools/testing/selftests/bpf/test_btf_nokv.c
@@ -22,8 +22,8 @@ struct dummy_tracepoint_args {
 	struct sock *sock;
 };
 
-SEC("dummy_tracepoint")
-int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
+__attribute__((noinline))
+static int test_long_fname_2(struct dummy_tracepoint_args *arg)
 {
 	struct ipv_counts *counts;
 	int key = 0;
@@ -40,4 +40,16 @@ int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
 	return 0;
 }
 
+__attribute__((noinline))
+static int test_long_fname_1(struct dummy_tracepoint_args *arg)
+{
+	return test_long_fname_2(arg);
+}
+
+SEC("dummy_tracepoint")
+int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
+{
+	return test_long_fname_1(arg);
+}
+
 char _license[] SEC("license") = "GPL";

From 254471e57a86b8dc1a2cc19848e99f5d7c0558f4 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Mon, 19 Nov 2018 15:29:21 -0800
Subject: [PATCH 54/71] tools/bpf: bpftool: add support for func types

This patch added support to print function signature
if btf func_info is available. Note that ksym
now uses function name instead of prog_name as
prog_name has a limit of 16 bytes including
ending '\0'.

The following is a sample output for selftests
test_btf with file test_btf_haskv.o for translated insns
and jited insns respectively.

  $ bpftool prog dump xlated id 1
  int _dummy_tracepoint(struct dummy_tracepoint_args * arg):
     0: (85) call pc+2#bpf_prog_2dcecc18072623fc_test_long_fname_1
     1: (b7) r0 = 0
     2: (95) exit
  int test_long_fname_1(struct dummy_tracepoint_args * arg):
     3: (85) call pc+1#bpf_prog_89d64e4abf0f0126_test_long_fname_2
     4: (95) exit
  int test_long_fname_2(struct dummy_tracepoint_args * arg):
     5: (b7) r2 = 0
     6: (63) *(u32 *)(r10 -4) = r2
     7: (79) r1 = *(u64 *)(r1 +8)
     ...
     22: (07) r1 += 1
     23: (63) *(u32 *)(r0 +4) = r1
     24: (95) exit

  $ bpftool prog dump jited id 1
  int _dummy_tracepoint(struct dummy_tracepoint_args * arg):
  bpf_prog_b07ccb89267cf242__dummy_tracepoint:
     0:   push   %rbp
     1:   mov    %rsp,%rbp
    ......
    3c:   add    $0x28,%rbp
    40:   leaveq
    41:   retq

  int test_long_fname_1(struct dummy_tracepoint_args * arg):
  bpf_prog_2dcecc18072623fc_test_long_fname_1:
     0:   push   %rbp
     1:   mov    %rsp,%rbp
    ......
    3a:   add    $0x28,%rbp
    3e:   leaveq
    3f:   retq

  int test_long_fname_2(struct dummy_tracepoint_args * arg):
  bpf_prog_89d64e4abf0f0126_test_long_fname_2:
     0:   push   %rbp
     1:   mov    %rsp,%rbp
    ......
    80:   add    $0x28,%rbp
    84:   leaveq
    85:   retq

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/btf_dumper.c    | 136 ++++++++++++++++++++++++++++++
 tools/bpf/bpftool/main.h          |   2 +
 tools/bpf/bpftool/prog.c          |  56 ++++++++++++
 tools/bpf/bpftool/xlated_dumper.c |  33 ++++++++
 tools/bpf/bpftool/xlated_dumper.h |   3 +
 5 files changed, 230 insertions(+)

diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
index 55bc512a18318..c3fd3a7cb7876 100644
--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -249,3 +249,139 @@ int btf_dumper_type(const struct btf_dumper *d, __u32 type_id,
 {
 	return btf_dumper_do_type(d, type_id, 0, data);
 }
+
+#define BTF_PRINT_ARG(...)						\
+	do {								\
+		pos += snprintf(func_sig + pos, size - pos,		\
+				__VA_ARGS__);				\
+		if (pos >= size)					\
+			return -1;					\
+	} while (0)
+#define BTF_PRINT_TYPE(type)					\
+	do {								\
+		pos = __btf_dumper_type_only(btf, type, func_sig,	\
+					     pos, size);		\
+		if (pos == -1)						\
+			return -1;					\
+	} while (0)
+
+static int btf_dump_func(const struct btf *btf, char *func_sig,
+			 const struct btf_type *func_proto,
+			 const struct btf_type *func, int pos, int size);
+
+static int __btf_dumper_type_only(const struct btf *btf, __u32 type_id,
+				  char *func_sig, int pos, int size)
+{
+	const struct btf_type *proto_type;
+	const struct btf_array *array;
+	const struct btf_type *t;
+
+	if (!type_id) {
+		BTF_PRINT_ARG("void ");
+		return pos;
+	}
+
+	t = btf__type_by_id(btf, type_id);
+
+	switch (BTF_INFO_KIND(t->info)) {
+	case BTF_KIND_INT:
+		BTF_PRINT_ARG("%s ", btf__name_by_offset(btf, t->name_off));
+		break;
+	case BTF_KIND_STRUCT:
+		BTF_PRINT_ARG("struct %s ",
+			      btf__name_by_offset(btf, t->name_off));
+		break;
+	case BTF_KIND_UNION:
+		BTF_PRINT_ARG("union %s ",
+			      btf__name_by_offset(btf, t->name_off));
+		break;
+	case BTF_KIND_ENUM:
+		BTF_PRINT_ARG("enum %s ",
+			      btf__name_by_offset(btf, t->name_off));
+		break;
+	case BTF_KIND_ARRAY:
+		array = (struct btf_array *)(t + 1);
+		BTF_PRINT_TYPE(array->type);
+		BTF_PRINT_ARG("[%d]", array->nelems);
+		break;
+	case BTF_KIND_PTR:
+		BTF_PRINT_TYPE(t->type);
+		BTF_PRINT_ARG("* ");
+		break;
+	case BTF_KIND_UNKN:
+	case BTF_KIND_FWD:
+	case BTF_KIND_TYPEDEF:
+		return -1;
+	case BTF_KIND_VOLATILE:
+		BTF_PRINT_ARG("volatile ");
+		BTF_PRINT_TYPE(t->type);
+		break;
+	case BTF_KIND_CONST:
+		BTF_PRINT_ARG("const ");
+		BTF_PRINT_TYPE(t->type);
+		break;
+	case BTF_KIND_RESTRICT:
+		BTF_PRINT_ARG("restrict ");
+		BTF_PRINT_TYPE(t->type);
+		break;
+	case BTF_KIND_FUNC_PROTO:
+		pos = btf_dump_func(btf, func_sig, t, NULL, pos, size);
+		if (pos == -1)
+			return -1;
+		break;
+	case BTF_KIND_FUNC:
+		proto_type = btf__type_by_id(btf, t->type);
+		pos = btf_dump_func(btf, func_sig, proto_type, t, pos, size);
+		if (pos == -1)
+			return -1;
+		break;
+	default:
+		return -1;
+	}
+
+	return pos;
+}
+
+static int btf_dump_func(const struct btf *btf, char *func_sig,
+			 const struct btf_type *func_proto,
+			 const struct btf_type *func, int pos, int size)
+{
+	int i, vlen;
+
+	BTF_PRINT_TYPE(func_proto->type);
+	if (func)
+		BTF_PRINT_ARG("%s(", btf__name_by_offset(btf, func->name_off));
+	else
+		BTF_PRINT_ARG("(");
+	vlen = BTF_INFO_VLEN(func_proto->info);
+	for (i = 0; i < vlen; i++) {
+		struct btf_param *arg = &((struct btf_param *)(func_proto + 1))[i];
+
+		if (i)
+			BTF_PRINT_ARG(", ");
+		if (arg->type) {
+			BTF_PRINT_TYPE(arg->type);
+			BTF_PRINT_ARG("%s",
+				      btf__name_by_offset(btf, arg->name_off));
+		} else {
+			BTF_PRINT_ARG("...");
+		}
+	}
+	BTF_PRINT_ARG(")");
+
+	return pos;
+}
+
+void btf_dumper_type_only(const struct btf *btf, __u32 type_id, char *func_sig,
+			  int size)
+{
+	int err;
+
+	func_sig[0] = '\0';
+	if (!btf)
+		return;
+
+	err = __btf_dumper_type_only(btf, type_id, func_sig, 0, size);
+	if (err < 0)
+		func_sig[0] = '\0';
+}
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 10c6c16fae29a..3e8979567cf1b 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -187,6 +187,8 @@ struct btf_dumper {
  */
 int btf_dumper_type(const struct btf_dumper *d, __u32 type_id,
 		    const void *data);
+void btf_dumper_type_only(const struct btf *btf, __u32 func_type_id,
+			  char *func_only, int size);
 
 struct nlattr;
 struct ifinfomsg;
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index c176e1aa66fe1..37b1daf19da60 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -47,6 +47,7 @@
 #include <linux/err.h>
 
 #include <bpf.h>
+#include <btf.h>
 #include <libbpf.h>
 
 #include "cfg.h"
@@ -451,14 +452,19 @@ static int do_dump(int argc, char **argv)
 	struct bpf_prog_info info = {};
 	unsigned int *func_lens = NULL;
 	const char *disasm_opt = NULL;
+	unsigned int finfo_rec_size;
 	unsigned int nr_func_ksyms;
 	unsigned int nr_func_lens;
 	struct dump_data dd = {};
 	__u32 len = sizeof(info);
+	struct btf *btf = NULL;
+	void *func_info = NULL;
+	unsigned int finfo_cnt;
 	unsigned int buf_size;
 	char *filepath = NULL;
 	bool opcodes = false;
 	bool visual = false;
+	char func_sig[1024];
 	unsigned char *buf;
 	__u32 *member_len;
 	__u64 *member_ptr;
@@ -551,6 +557,17 @@ static int do_dump(int argc, char **argv)
 		}
 	}
 
+	finfo_cnt = info.func_info_cnt;
+	finfo_rec_size = info.func_info_rec_size;
+	if (finfo_cnt && finfo_rec_size) {
+		func_info = malloc(finfo_cnt * finfo_rec_size);
+		if (!func_info) {
+			p_err("mem alloc failed");
+			close(fd);
+			goto err_free;
+		}
+	}
+
 	memset(&info, 0, sizeof(info));
 
 	*member_ptr = ptr_to_u64(buf);
@@ -559,6 +576,9 @@ static int do_dump(int argc, char **argv)
 	info.nr_jited_ksyms = nr_func_ksyms;
 	info.jited_func_lens = ptr_to_u64(func_lens);
 	info.nr_jited_func_lens = nr_func_lens;
+	info.func_info_cnt = finfo_cnt;
+	info.func_info_rec_size = finfo_rec_size;
+	info.func_info = ptr_to_u64(func_info);
 
 	err = bpf_obj_get_info_by_fd(fd, &info, &len);
 	close(fd);
@@ -582,6 +602,18 @@ static int do_dump(int argc, char **argv)
 		goto err_free;
 	}
 
+	if (info.func_info_cnt != finfo_cnt) {
+		p_err("incorrect func_info_cnt %d vs. expected %d",
+		      info.func_info_cnt, finfo_cnt);
+		goto err_free;
+	}
+
+	if (info.func_info_rec_size != finfo_rec_size) {
+		p_err("incorrect func_info_rec_size %d vs. expected %d",
+		      info.func_info_rec_size, finfo_rec_size);
+		goto err_free;
+	}
+
 	if ((member_len == &info.jited_prog_len &&
 	     info.jited_prog_insns == 0) ||
 	    (member_len == &info.xlated_prog_len &&
@@ -590,6 +622,11 @@ static int do_dump(int argc, char **argv)
 		goto err_free;
 	}
 
+	if (info.btf_id && btf_get_from_id(info.btf_id, &btf)) {
+		p_err("failed to get btf");
+		goto err_free;
+	}
+
 	if (filepath) {
 		fd = open(filepath, O_WRONLY | O_CREAT | O_TRUNC, 0600);
 		if (fd < 0) {
@@ -622,6 +659,7 @@ static int do_dump(int argc, char **argv)
 
 		if (info.nr_jited_func_lens && info.jited_func_lens) {
 			struct kernel_sym *sym = NULL;
+			struct bpf_func_info *record;
 			char sym_name[SYM_MAX_NAME];
 			unsigned char *img = buf;
 			__u64 *ksyms = NULL;
@@ -648,12 +686,25 @@ static int do_dump(int argc, char **argv)
 					strcpy(sym_name, "unknown");
 				}
 
+				if (func_info) {
+					record = func_info + i * finfo_rec_size;
+					btf_dumper_type_only(btf, record->type_id,
+							     func_sig,
+							     sizeof(func_sig));
+				}
+
 				if (json_output) {
 					jsonw_start_object(json_wtr);
+					if (func_info && func_sig[0] != '\0') {
+						jsonw_name(json_wtr, "proto");
+						jsonw_string(json_wtr, func_sig);
+					}
 					jsonw_name(json_wtr, "name");
 					jsonw_string(json_wtr, sym_name);
 					jsonw_name(json_wtr, "insns");
 				} else {
+					if (func_info && func_sig[0] != '\0')
+						printf("%s:\n", func_sig);
 					printf("%s:\n", sym_name);
 				}
 
@@ -682,6 +733,9 @@ static int do_dump(int argc, char **argv)
 		kernel_syms_load(&dd);
 		dd.nr_jited_ksyms = info.nr_jited_ksyms;
 		dd.jited_ksyms = (__u64 *) info.jited_ksyms;
+		dd.btf = btf;
+		dd.func_info = func_info;
+		dd.finfo_rec_size = finfo_rec_size;
 
 		if (json_output)
 			dump_xlated_json(&dd, buf, *member_len, opcodes);
@@ -693,12 +747,14 @@ static int do_dump(int argc, char **argv)
 	free(buf);
 	free(func_ksyms);
 	free(func_lens);
+	free(func_info);
 	return 0;
 
 err_free:
 	free(buf);
 	free(func_ksyms);
 	free(func_lens);
+	free(func_info);
 	return -1;
 }
 
diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c
index 3284759df98ad..e06ac0286a750 100644
--- a/tools/bpf/bpftool/xlated_dumper.c
+++ b/tools/bpf/bpftool/xlated_dumper.c
@@ -242,11 +242,15 @@ void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len,
 		.cb_imm		= print_imm,
 		.private_data	= dd,
 	};
+	struct bpf_func_info *record;
 	struct bpf_insn *insn = buf;
+	struct btf *btf = dd->btf;
 	bool double_insn = false;
+	char func_sig[1024];
 	unsigned int i;
 
 	jsonw_start_array(json_wtr);
+	record = dd->func_info;
 	for (i = 0; i < len / sizeof(*insn); i++) {
 		if (double_insn) {
 			double_insn = false;
@@ -255,6 +259,20 @@ void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len,
 		double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW);
 
 		jsonw_start_object(json_wtr);
+
+		if (btf && record) {
+			if (record->insn_offset == i) {
+				btf_dumper_type_only(btf, record->type_id,
+						     func_sig,
+						     sizeof(func_sig));
+				if (func_sig[0] != '\0') {
+					jsonw_name(json_wtr, "proto");
+					jsonw_string(json_wtr, func_sig);
+				}
+				record = (void *)record + dd->finfo_rec_size;
+			}
+		}
+
 		jsonw_name(json_wtr, "disasm");
 		print_bpf_insn(&cbs, insn + i, true);
 
@@ -297,16 +315,31 @@ void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len,
 		.cb_imm		= print_imm,
 		.private_data	= dd,
 	};
+	struct bpf_func_info *record;
 	struct bpf_insn *insn = buf;
+	struct btf *btf = dd->btf;
 	bool double_insn = false;
+	char func_sig[1024];
 	unsigned int i;
 
+	record = dd->func_info;
 	for (i = 0; i < len / sizeof(*insn); i++) {
 		if (double_insn) {
 			double_insn = false;
 			continue;
 		}
 
+		if (btf && record) {
+			if (record->insn_offset == i) {
+				btf_dumper_type_only(btf, record->type_id,
+						     func_sig,
+						     sizeof(func_sig));
+				if (func_sig[0] != '\0')
+					printf("%s:\n", func_sig);
+				record = (void *)record + dd->finfo_rec_size;
+			}
+		}
+
 		double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW);
 
 		printf("% 4d: ", i);
diff --git a/tools/bpf/bpftool/xlated_dumper.h b/tools/bpf/bpftool/xlated_dumper.h
index 33d86e2b369b7..aec31723e1e5d 100644
--- a/tools/bpf/bpftool/xlated_dumper.h
+++ b/tools/bpf/bpftool/xlated_dumper.h
@@ -51,6 +51,9 @@ struct dump_data {
 	__u32 sym_count;
 	__u64 *jited_ksyms;
 	__u32 nr_jited_ksyms;
+	struct btf *btf;
+	void *func_info;
+	__u32 finfo_rec_size;
 	char scratch_buff[SYM_MAX_NAME + 8];
 };
 

From f6161a8f3036caa45f225486be39783e99e0fa29 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 20 Nov 2018 14:08:20 -0800
Subject: [PATCH 55/71] bpf: fix a compilation error when CONFIG_BPF_SYSCALL is
 not defined

Kernel test robot (lkp@intel.com) reports a compilation error at
  https://www.spinics.net/lists/netdev/msg534913.html
introduced by commit 838e96904ff3 ("bpf: Introduce bpf_func_info").

If CONFIG_BPF is defined and CONFIG_BPF_SYSCALL is not defined,
the following error will appear:
  kernel/bpf/core.c:414: undefined reference to `btf_type_by_id'
  kernel/bpf/core.c:415: undefined reference to `btf_name_by_offset'

When CONFIG_BPF_SYSCALL is not defined,
let us define stub inline functions for btf_type_by_id()
and btf_name_by_offset() in include/linux/btf.h.
This way, the compilation failure can be avoided.

Fixes: 838e96904ff3 ("bpf: Introduce bpf_func_info")
Reported-by: kbuild test robot <lkp@intel.com>
Cc: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/btf.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/include/linux/btf.h b/include/linux/btf.h
index 7f2c0a4a45ea6..8c2199b5d2503 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -46,7 +46,21 @@ void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj,
 		       struct seq_file *m);
 int btf_get_fd_by_id(u32 id);
 u32 btf_id(const struct btf *btf);
+
+#ifdef CONFIG_BPF_SYSCALL
 const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
 const char *btf_name_by_offset(const struct btf *btf, u32 offset);
+#else
+static inline const struct btf_type *btf_type_by_id(const struct btf *btf,
+						    u32 type_id)
+{
+	return NULL;
+}
+static inline const char *btf_name_by_offset(const struct btf *btf,
+					     u32 offset)
+{
+	return NULL;
+}
+#endif
 
 #endif

From 462c124c590fe633564192dbfa26e99af788a67c Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 21 Nov 2018 11:22:42 -0800
Subject: [PATCH 56/71] bpf: fix a libbpf loader issue

Commit 2993e0515bb4 ("tools/bpf: add support to read .BTF.ext sections")
added support to read .BTF.ext sections from an object file, create
and pass prog_btf_fd and func_info to the kernel.

The program btf_fd (prog->btf_fd) is initialized to be -1 to please
zclose so we do not need special handling dur prog close.
Passing -1 to the kernel, however, will cause loading error.
Passing btf_fd 0 to the kernel if prog->btf_fd is invalid
fixed the problem.

Fixes: 2993e0515bb4 ("tools/bpf: add support to read .BTF.ext sections")
Reported-by: Andrey Ignatov <rdna@fb.com>
Reported-by: Emre Cantimur <haydum@fb.com>
Tested-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index cb6565d796034..f022ac82e882d 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1387,7 +1387,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
 	load_attr.license = license;
 	load_attr.kern_version = kern_version;
 	load_attr.prog_ifindex = prog->prog_ifindex;
-	load_attr.prog_btf_fd = prog->btf_fd;
+	load_attr.prog_btf_fd = prog->btf_fd >= 0 ? prog->btf_fd : 0;
 	load_attr.func_info = prog->func_info;
 	load_attr.func_info_rec_size = prog->func_info_rec_size;
 	load_attr.func_info_cnt = func_info_cnt;

From 8c4905b995c649ac71e21611abc2fcefc904b56a Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Wed, 21 Nov 2018 09:29:44 -0800
Subject: [PATCH 57/71] libbpf: make sure bpf headers are c++ include-able

Wrap headers in extern "C", to turn off C++ mangling.
This simplifies including libbpf in c++ and linking against it.

v2 changes:
* do the same for btf.h

v3 changes:
* test_libbpf.cpp to test for possible future c++ breakages

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/Makefile        | 15 ++++++++++++---
 tools/lib/bpf/bpf.h           |  9 +++++++++
 tools/lib/bpf/btf.h           |  8 ++++++++
 tools/lib/bpf/libbpf.h        |  9 +++++++++
 tools/lib/bpf/test_libbpf.cpp | 18 ++++++++++++++++++
 5 files changed, 56 insertions(+), 3 deletions(-)
 create mode 100644 tools/lib/bpf/test_libbpf.cpp

diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 425b480bda752..1b4a683a00fc4 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -66,7 +66,7 @@ ifndef VERBOSE
 endif
 
 FEATURE_USER = .libbpf
-FEATURE_TESTS = libelf libelf-mmap bpf reallocarray
+FEATURE_TESTS = libelf libelf-mmap bpf reallocarray cxx
 FEATURE_DISPLAY = libelf bpf
 
 INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi
@@ -148,6 +148,12 @@ LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE))
 
 CMD_TARGETS = $(LIB_FILE)
 
+CXX_TEST_TARGET = $(OUTPUT)test_libbpf
+
+ifeq ($(feature-cxx), 1)
+	CMD_TARGETS += $(CXX_TEST_TARGET)
+endif
+
 TARGETS = $(CMD_TARGETS)
 
 all: fixdep all_cmd
@@ -175,6 +181,9 @@ $(OUTPUT)libbpf.so: $(BPF_IN)
 $(OUTPUT)libbpf.a: $(BPF_IN)
 	$(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^
 
+$(OUTPUT)test_libbpf: test_libbpf.cpp $(OUTPUT)libbpf.a
+	$(QUIET_LINK)$(CXX) $^ -lelf -o $@
+
 define do_install
 	if [ ! -d '$(DESTDIR_SQ)$2' ]; then		\
 		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2';	\
@@ -201,8 +210,8 @@ config-clean:
 	$(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null
 
 clean:
-	$(call QUIET_CLEAN, libbpf) $(RM) *.o *~ $(TARGETS) *.a *.so .*.d .*.cmd \
-		$(RM) LIBBPF-CFLAGS
+	$(call QUIET_CLEAN, libbpf) $(RM) $(TARGETS) $(CXX_TEST_TARGET) \
+		*.o *~ *.a *.so .*.d .*.cmd LIBBPF-CFLAGS
 	$(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf
 
 
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 8bdfd806253ae..09e8bbe111d4c 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -27,6 +27,10 @@
 #include <stdbool.h>
 #include <stddef.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifndef LIBBPF_API
 #define LIBBPF_API __attribute__((visibility("default")))
 #endif
@@ -132,4 +136,9 @@ LIBBPF_API int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf,
 LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf,
 				 __u32 *buf_len, __u32 *prog_id, __u32 *fd_type,
 				 __u64 *probe_offset, __u64 *probe_addr);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
 #endif /* __LIBBPF_BPF_H */
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 386b2ffc32a3e..701ad2b6c41fe 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -6,6 +6,10 @@
 
 #include <linux/types.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifndef LIBBPF_API
 #define LIBBPF_API __attribute__((visibility("default")))
 #endif
@@ -80,4 +84,8 @@ int btf_ext__reloc(struct btf *btf, struct btf_ext *btf_ext,
 		   const char *sec_name, __u32 insns_cnt, void **func_info,
 		   __u32 *func_info_len);
 
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
 #endif /* __LIBBPF_BTF_H */
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index b1686a7871022..74e57e041705a 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -16,6 +16,10 @@
 #include <sys/types.h>  // for size_t
 #include <linux/bpf.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifndef LIBBPF_API
 #define LIBBPF_API __attribute__((visibility("default")))
 #endif
@@ -335,4 +339,9 @@ int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex,
 			libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie);
 int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle,
 			 libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
 #endif /* __LIBBPF_LIBBPF_H */
diff --git a/tools/lib/bpf/test_libbpf.cpp b/tools/lib/bpf/test_libbpf.cpp
new file mode 100644
index 0000000000000..abf3fc25c9fa3
--- /dev/null
+++ b/tools/lib/bpf/test_libbpf.cpp
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#include "libbpf.h"
+#include "bpf.h"
+#include "btf.h"
+
+/* do nothing, just make sure we can link successfully */
+
+int main(int argc, char *argv[])
+{
+    /* libbpf.h */
+    libbpf_set_print(NULL, NULL, NULL);
+
+    /* bpf.h */
+    bpf_prog_get_fd_by_id(0);
+
+    /* btf.h */
+    btf__new(NULL, 0, NULL);
+}

From 47eff61777c7b2db58805f974994713c8acbe9a6 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Tue, 20 Nov 2018 17:11:19 -0800
Subject: [PATCH 58/71] bpf, libbpf: introduce bpf_object__probe_caps to test
 BPF capabilities

It currently only checks whether kernel supports map/prog names.
This capability check will be used in the next two commits to
skip setting prog/map names.

Suggested-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c | 58 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index f022ac82e882d..dffdd68b5e6be 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -24,6 +24,7 @@
 #include <linux/kernel.h>
 #include <linux/bpf.h>
 #include <linux/btf.h>
+#include <linux/filter.h>
 #include <linux/list.h>
 #include <linux/limits.h>
 #include <linux/perf_event.h>
@@ -114,6 +115,11 @@ void libbpf_set_print(libbpf_print_fn_t warn,
 # define LIBBPF_ELF_C_READ_MMAP ELF_C_READ
 #endif
 
+struct bpf_capabilities {
+	/* v4.14: kernel support for program & map names. */
+	__u32 name:1;
+};
+
 /*
  * bpf_prog should be a better name but it has been used in
  * linux/filter.h.
@@ -160,6 +166,8 @@ struct bpf_program {
 	void *func_info;
 	__u32 func_info_rec_size;
 	__u32 func_info_len;
+
+	struct bpf_capabilities *caps;
 };
 
 struct bpf_map {
@@ -221,6 +229,8 @@ struct bpf_object {
 	void *priv;
 	bpf_object_clear_priv_t clear_priv;
 
+	struct bpf_capabilities caps;
+
 	char path[];
 };
 #define obj_elf_valid(o)	((o)->efile.elf)
@@ -342,6 +352,7 @@ bpf_object__add_program(struct bpf_object *obj, void *data, size_t size,
 	if (err)
 		return err;
 
+	prog.caps = &obj->caps;
 	progs = obj->programs;
 	nr_progs = obj->nr_programs;
 
@@ -1135,6 +1146,52 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd)
 	return -errno;
 }
 
+static int
+bpf_object__probe_name(struct bpf_object *obj)
+{
+	struct bpf_load_program_attr attr;
+	char *cp, errmsg[STRERR_BUFSIZE];
+	struct bpf_insn insns[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	int ret;
+
+	/* make sure basic loading works */
+
+	memset(&attr, 0, sizeof(attr));
+	attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+	attr.insns = insns;
+	attr.insns_cnt = ARRAY_SIZE(insns);
+	attr.license = "GPL";
+
+	ret = bpf_load_program_xattr(&attr, NULL, 0);
+	if (ret < 0) {
+		cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
+		pr_warning("Error in %s():%s(%d). Couldn't load basic 'r0 = 0' BPF program.\n",
+			   __func__, cp, errno);
+		return -errno;
+	}
+	close(ret);
+
+	/* now try the same program, but with the name */
+
+	attr.name = "test";
+	ret = bpf_load_program_xattr(&attr, NULL, 0);
+	if (ret >= 0) {
+		obj->caps.name = 1;
+		close(ret);
+	}
+
+	return 0;
+}
+
+static int
+bpf_object__probe_caps(struct bpf_object *obj)
+{
+	return bpf_object__probe_name(obj);
+}
+
 static int
 bpf_object__create_maps(struct bpf_object *obj)
 {
@@ -1708,6 +1765,7 @@ int bpf_object__load(struct bpf_object *obj)
 
 	obj->loaded = true;
 
+	CHECK_ERR(bpf_object__probe_caps(obj), err, out);
 	CHECK_ERR(bpf_object__create_maps(obj), err, out);
 	CHECK_ERR(bpf_object__relocate(obj), err, out);
 	CHECK_ERR(bpf_object__load_progs(obj), err, out);

From 94cb310cfaa16582cd49ebbeea5925e8f49324a1 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Tue, 20 Nov 2018 17:11:20 -0800
Subject: [PATCH 59/71] bpf: libbpf: remove map name retry from
 bpf_create_map_xattr

Instead, check for a newly created caps.name bpf_object capability.
If kernel doesn't support names, don't specify the attribute.

See commit 23499442c319 ("bpf: libbpf: retry map creation without
the name") for rationale.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/bpf.c    | 11 +----------
 tools/lib/bpf/libbpf.c |  3 ++-
 2 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 836447bb4f143..ce18221945900 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -69,7 +69,6 @@ int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
 {
 	__u32 name_len = create_attr->name ? strlen(create_attr->name) : 0;
 	union bpf_attr attr;
-	int ret;
 
 	memset(&attr, '\0', sizeof(attr));
 
@@ -87,15 +86,7 @@ int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
 	attr.map_ifindex = create_attr->map_ifindex;
 	attr.inner_map_fd = create_attr->inner_map_fd;
 
-	ret = sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
-	if (ret < 0 && errno == EINVAL && create_attr->name) {
-		/* Retry the same syscall, but without the name.
-		 * Pre v4.14 kernels don't support map names.
-		 */
-		memset(attr.map_name, 0, sizeof(attr.map_name));
-		return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
-	}
-	return ret;
+	return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
 }
 
 int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index dffdd68b5e6be..f28e64dd8b5a5 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1211,7 +1211,8 @@ bpf_object__create_maps(struct bpf_object *obj)
 			continue;
 		}
 
-		create_attr.name = map->name;
+		if (obj->caps.name)
+			create_attr.name = map->name;
 		create_attr.map_ifindex = map->map_ifindex;
 		create_attr.map_type = def->type;
 		create_attr.map_flags = def->map_flags;

From 5b32a23e1d879b78ee72144311314eccf7581bf4 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Tue, 20 Nov 2018 17:11:21 -0800
Subject: [PATCH 60/71] bpf: libbpf: don't specify prog name if kernel doesn't
 support it

Use recently added capability check.

See commit 23499442c319 ("bpf: libbpf: retry map creation without
the name") for rationale.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index f28e64dd8b5a5..edbae2b1b046a 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1439,7 +1439,8 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
 	memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
 	load_attr.prog_type = prog->type;
 	load_attr.expected_attach_type = prog->expected_attach_type;
-	load_attr.name = prog->name;
+	if (prog->caps->name)
+		load_attr.name = prog->name;
 	load_attr.insns = insns;
 	load_attr.insns_cnt = insns_cnt;
 	load_attr.license = license;

From addb9fc90f13898e7779da54f471792e3dfb0d55 Mon Sep 17 00:00:00 2001
From: "Nikita V. Shirokov" <tehnerd@tehnerd.com>
Date: Tue, 20 Nov 2018 20:55:56 -0800
Subject: [PATCH 61/71] bpf: adding support for map in map in libbpf

idea is pretty simple. for specified map (pointed by struct bpf_map)
we would provide descriptor of already loaded map, which is going to be
used as a prototype for inner map. proposed workflow:
1) open bpf's object (bpf_object__open)
2) create bpf's map which is going to be used as a prototype
3) find (by name) map-in-map which you want to load and update w/
descriptor of inner map w/ a new helper from this patch
4) load bpf program w/ bpf_object__load

Signed-off-by: Nikita V. Shirokov <tehnerd@tehnerd.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c | 40 ++++++++++++++++++++++++++++++++++------
 tools/lib/bpf/libbpf.h |  2 ++
 2 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index edbae2b1b046a..0f14f7c074c29 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -175,6 +175,7 @@ struct bpf_map {
 	char *name;
 	size_t offset;
 	int map_ifindex;
+	int inner_map_fd;
 	struct bpf_map_def def;
 	__u32 btf_key_type_id;
 	__u32 btf_value_type_id;
@@ -605,6 +606,14 @@ static int compare_bpf_map(const void *_a, const void *_b)
 	return a->offset - b->offset;
 }
 
+static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
+{
+	if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
+	    type == BPF_MAP_TYPE_HASH_OF_MAPS)
+		return true;
+	return false;
+}
+
 static int
 bpf_object__init_maps(struct bpf_object *obj, int flags)
 {
@@ -668,13 +677,15 @@ bpf_object__init_maps(struct bpf_object *obj, int flags)
 	}
 	obj->nr_maps = nr_maps;
 
-	/*
-	 * fill all fd with -1 so won't close incorrect
-	 * fd (fd=0 is stdin) when failure (zclose won't close
-	 * negative fd)).
-	 */
-	for (i = 0; i < nr_maps; i++)
+	for (i = 0; i < nr_maps; i++) {
+		/*
+		 * fill all fd with -1 so won't close incorrect
+		 * fd (fd=0 is stdin) when failure (zclose won't close
+		 * negative fd)).
+		 */
 		obj->maps[i].fd = -1;
+		obj->maps[i].inner_map_fd = -1;
+	}
 
 	/*
 	 * Fill obj->maps using data in "maps" section.
@@ -1222,6 +1233,9 @@ bpf_object__create_maps(struct bpf_object *obj)
 		create_attr.btf_fd = 0;
 		create_attr.btf_key_type_id = 0;
 		create_attr.btf_value_type_id = 0;
+		if (bpf_map_type__is_map_in_map(def->type) &&
+		    map->inner_map_fd >= 0)
+			create_attr.inner_map_fd = map->inner_map_fd;
 
 		if (obj->btf && !bpf_map_find_btf_info(map, obj->btf)) {
 			create_attr.btf_fd = btf__fd(obj->btf);
@@ -2681,6 +2695,20 @@ void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
 	map->map_ifindex = ifindex;
 }
 
+int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
+{
+	if (!bpf_map_type__is_map_in_map(map->def.type)) {
+		pr_warning("error: unsupported map type\n");
+		return -EINVAL;
+	}
+	if (map->inner_map_fd != -1) {
+		pr_warning("error: inner_map_fd already specified\n");
+		return -EINVAL;
+	}
+	map->inner_map_fd = fd;
+	return 0;
+}
+
 static struct bpf_map *
 __bpf_map__iter(struct bpf_map *m, struct bpf_object *obj, int i)
 {
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 74e57e041705a..f30c3d07bb7db 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -297,6 +297,8 @@ LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);
 LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path);
 LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path);
 
+LIBBPF_API int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd);
+
 LIBBPF_API long libbpf_get_error(const void *ptr);
 
 struct bpf_prog_load_attr {

From b1957c92eba5acad7d8a0a6f968561e08714e978 Mon Sep 17 00:00:00 2001
From: "Nikita V. Shirokov" <tehnerd@tehnerd.com>
Date: Tue, 20 Nov 2018 20:55:57 -0800
Subject: [PATCH 62/71] bpf: adding tests for map_in_map helpber in libbpf

adding test/example of bpf_map__set_inner_map_fd usage

Signed-off-by: Nikita V. Shirokov <tehnerd@tehnerd.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/Makefile          |  3 +-
 tools/testing/selftests/bpf/test_map_in_map.c | 49 ++++++++++
 tools/testing/selftests/bpf/test_maps.c       | 90 +++++++++++++++++++
 3 files changed, 141 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/test_map_in_map.c

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 1dde03ea14848..43157bd891655 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -38,7 +38,8 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
 	test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \
 	get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \
 	test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o \
-	test_sk_lookup_kern.o test_xdp_vlan.o test_queue_map.o test_stack_map.o
+	test_sk_lookup_kern.o test_xdp_vlan.o test_queue_map.o test_stack_map.o \
+	test_map_in_map.o
 
 # Order correspond to 'make run_tests' order
 TEST_PROGS := test_kmod.sh \
diff --git a/tools/testing/selftests/bpf/test_map_in_map.c b/tools/testing/selftests/bpf/test_map_in_map.c
new file mode 100644
index 0000000000000..ce923e67e08e1
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_map_in_map.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Facebook */
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <linux/types.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") mim_array = {
+	.type = BPF_MAP_TYPE_ARRAY_OF_MAPS,
+	.key_size = sizeof(int),
+	/* must be sizeof(__u32) for map in map */
+	.value_size = sizeof(__u32),
+	.max_entries = 1,
+	.map_flags = 0,
+};
+
+struct bpf_map_def SEC("maps") mim_hash = {
+	.type = BPF_MAP_TYPE_HASH_OF_MAPS,
+	.key_size = sizeof(int),
+	/* must be sizeof(__u32) for map in map */
+	.value_size = sizeof(__u32),
+	.max_entries = 1,
+	.map_flags = 0,
+};
+
+SEC("xdp_mimtest")
+int xdp_mimtest0(struct xdp_md *ctx)
+{
+	int value = 123;
+	int key = 0;
+	void *map;
+
+	map = bpf_map_lookup_elem(&mim_array, &key);
+	if (!map)
+		return XDP_DROP;
+
+	bpf_map_update_elem(map, &key, &value, 0);
+
+	map = bpf_map_lookup_elem(&mim_hash, &key);
+	if (!map)
+		return XDP_DROP;
+
+	bpf_map_update_elem(map, &key, &value, 0);
+
+	return XDP_PASS;
+}
+
+int _version SEC("version") = 1;
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 9f0a5b16a2469..9c79ee017df3b 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -1125,6 +1125,94 @@ static void test_sockmap(int tasks, void *data)
 	exit(1);
 }
 
+#define MAPINMAP_PROG "./test_map_in_map.o"
+static void test_map_in_map(void)
+{
+	struct bpf_program *prog;
+	struct bpf_object *obj;
+	struct bpf_map *map;
+	int mim_fd, fd, err;
+	int pos = 0;
+
+	obj = bpf_object__open(MAPINMAP_PROG);
+
+	fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(int), sizeof(int),
+			    2, 0);
+	if (fd < 0) {
+		printf("Failed to create hashmap '%s'!\n", strerror(errno));
+		exit(1);
+	}
+
+	map = bpf_object__find_map_by_name(obj, "mim_array");
+	if (IS_ERR(map)) {
+		printf("Failed to load array of maps from test prog\n");
+		goto out_map_in_map;
+	}
+	err = bpf_map__set_inner_map_fd(map, fd);
+	if (err) {
+		printf("Failed to set inner_map_fd for array of maps\n");
+		goto out_map_in_map;
+	}
+
+	map = bpf_object__find_map_by_name(obj, "mim_hash");
+	if (IS_ERR(map)) {
+		printf("Failed to load hash of maps from test prog\n");
+		goto out_map_in_map;
+	}
+	err = bpf_map__set_inner_map_fd(map, fd);
+	if (err) {
+		printf("Failed to set inner_map_fd for hash of maps\n");
+		goto out_map_in_map;
+	}
+
+	bpf_object__for_each_program(prog, obj) {
+		bpf_program__set_xdp(prog);
+	}
+	bpf_object__load(obj);
+
+	map = bpf_object__find_map_by_name(obj, "mim_array");
+	if (IS_ERR(map)) {
+		printf("Failed to load array of maps from test prog\n");
+		goto out_map_in_map;
+	}
+	mim_fd = bpf_map__fd(map);
+	if (mim_fd < 0) {
+		printf("Failed to get descriptor for array of maps\n");
+		goto out_map_in_map;
+	}
+
+	err = bpf_map_update_elem(mim_fd, &pos, &fd, 0);
+	if (err) {
+		printf("Failed to update array of maps\n");
+		goto out_map_in_map;
+	}
+
+	map = bpf_object__find_map_by_name(obj, "mim_hash");
+	if (IS_ERR(map)) {
+		printf("Failed to load hash of maps from test prog\n");
+		goto out_map_in_map;
+	}
+	mim_fd = bpf_map__fd(map);
+	if (mim_fd < 0) {
+		printf("Failed to get descriptor for hash of maps\n");
+		goto out_map_in_map;
+	}
+
+	err = bpf_map_update_elem(mim_fd, &pos, &fd, 0);
+	if (err) {
+		printf("Failed to update hash of maps\n");
+		goto out_map_in_map;
+	}
+
+	close(fd);
+	bpf_object__close(obj);
+	return;
+
+out_map_in_map:
+	close(fd);
+	exit(1);
+}
+
 #define MAP_SIZE (32 * 1024)
 
 static void test_map_large(void)
@@ -1600,6 +1688,8 @@ static void run_all_tests(void)
 
 	test_queuemap(0, NULL);
 	test_stackmap(0, NULL);
+
+	test_map_in_map();
 }
 
 int main(void)

From 8d75839b843ae0ef8d9db97ed05b493e687e6b75 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 21 Nov 2018 21:39:52 -0800
Subject: [PATCH 63/71] bpf, lpm: make longest_prefix_match() faster

At LPC 2018 in Vancouver, Vlad Dumitrescu mentioned that longest_prefix_match()
has a high cost [1].

One reason for that cost is a loop handling one byte at a time.

We can handle more bytes at a time, if enough attention is paid
to endianness.

I was able to remove ~55 % of longest_prefix_match() cpu costs.

[1] https://linuxplumbersconf.org/event/2/contributions/88/attachments/76/87/lpc-bpf-2018-shaping.pdf

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Vlad Dumitrescu <vladum@google.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/lpm_trie.c | 59 +++++++++++++++++++++++++++++++++++--------
 1 file changed, 49 insertions(+), 10 deletions(-)

diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 9058317ba9de2..bfd4882e1106c 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -168,20 +168,59 @@ static size_t longest_prefix_match(const struct lpm_trie *trie,
 				   const struct lpm_trie_node *node,
 				   const struct bpf_lpm_trie_key *key)
 {
-	size_t prefixlen = 0;
-	size_t i;
+	u32 limit = min(node->prefixlen, key->prefixlen);
+	u32 prefixlen = 0, i = 0;
 
-	for (i = 0; i < trie->data_size; i++) {
-		size_t b;
+	BUILD_BUG_ON(offsetof(struct lpm_trie_node, data) % sizeof(u32));
+	BUILD_BUG_ON(offsetof(struct bpf_lpm_trie_key, data) % sizeof(u32));
 
-		b = 8 - fls(node->data[i] ^ key->data[i]);
-		prefixlen += b;
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(CONFIG_64BIT)
 
-		if (prefixlen >= node->prefixlen || prefixlen >= key->prefixlen)
-			return min(node->prefixlen, key->prefixlen);
+	/* data_size >= 16 has very small probability.
+	 * We do not use a loop for optimal code generation.
+	 */
+	if (trie->data_size >= 8) {
+		u64 diff = be64_to_cpu(*(__be64 *)node->data ^
+				       *(__be64 *)key->data);
+
+		prefixlen = 64 - fls64(diff);
+		if (prefixlen >= limit)
+			return limit;
+		if (diff)
+			return prefixlen;
+		i = 8;
+	}
+#endif
+
+	while (trie->data_size >= i + 4) {
+		u32 diff = be32_to_cpu(*(__be32 *)&node->data[i] ^
+				       *(__be32 *)&key->data[i]);
+
+		prefixlen += 32 - fls(diff);
+		if (prefixlen >= limit)
+			return limit;
+		if (diff)
+			return prefixlen;
+		i += 4;
+	}
 
-		if (b < 8)
-			break;
+	if (trie->data_size >= i + 2) {
+		u16 diff = be16_to_cpu(*(__be16 *)&node->data[i] ^
+				       *(__be16 *)&key->data[i]);
+
+		prefixlen += 16 - fls(diff);
+		if (prefixlen >= limit)
+			return limit;
+		if (diff)
+			return prefixlen;
+		i += 2;
+	}
+
+	if (trie->data_size >= i + 1) {
+		prefixlen += 8 - fls(node->data[i] ^ key->data[i]);
+
+		if (prefixlen >= limit)
+			return limit;
 	}
 
 	return prefixlen;

From ab85b0143428739cb0f2533182d5824375b6b66b Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 22 Nov 2018 10:13:45 +0000
Subject: [PATCH 64/71] tools/bpf: fix spelling mistake "memeory" -> "memory"

The CHECK message contains a spelling mistake, fix it.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_btf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index 7b1b160d6e67b..bcbda7037840e 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -2573,7 +2573,7 @@ static int do_test_file(unsigned int test_num)
 	}
 
 	func_info = malloc(info.func_info_cnt * rec_size);
-	if (CHECK(!func_info, "out of memeory")) {
+	if (CHECK(!func_info, "out of memory")) {
 		err = -1;
 		goto done;
 	}
@@ -3299,7 +3299,7 @@ static int do_test_func_type(int test_num)
 	}
 
 	func_info = malloc(info.func_info_cnt * rec_size);
-	if (CHECK(!func_info, "out of memeory")) {
+	if (CHECK(!func_info, "out of memory")) {
 		err = -1;
 		goto done;
 	}

From f11216b24219ab26d8d159fbfa12dff886b16e32 Mon Sep 17 00:00:00 2001
From: Vlad Dumitrescu <vladum@google.com>
Date: Thu, 22 Nov 2018 14:39:16 -0500
Subject: [PATCH 65/71] bpf: add skb->tstamp r/w access from tc clsact and cg
 skb progs

This could be used to rate limit egress traffic in concert with a qdisc
which supports Earliest Departure Time, such as FQ.

Write access from cg skb progs only with CAP_SYS_ADMIN, since the value
will be used by downstream qdiscs. It might make sense to relax this.

Changes v1 -> v2:
  - allow access from cg skb, write only with CAP_SYS_ADMIN

Signed-off-by: Vlad Dumitrescu <vladum@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h                    |  1 +
 net/core/filter.c                           | 29 +++++++++++++++++++++
 tools/include/uapi/linux/bpf.h              |  1 +
 tools/testing/selftests/bpf/test_verifier.c | 29 +++++++++++++++++++++
 4 files changed, 60 insertions(+)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c1554aa074659..23e2031a43d43 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2468,6 +2468,7 @@ struct __sk_buff {
 
 	__u32 data_meta;
 	struct bpf_flow_keys *flow_keys;
+	__u64 tstamp;
 };
 
 struct bpf_tunnel_key {
diff --git a/net/core/filter.c b/net/core/filter.c
index f6ca38a7d4332..65dc13aeca7c4 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5573,6 +5573,10 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
 		if (size != sizeof(struct bpf_flow_keys *))
 			return false;
 		break;
+	case bpf_ctx_range(struct __sk_buff, tstamp):
+		if (size != sizeof(__u64))
+			return false;
+		break;
 	default:
 		/* Only narrow read access allowed for now. */
 		if (type == BPF_WRITE) {
@@ -5600,6 +5604,7 @@ static bool sk_filter_is_valid_access(int off, int size,
 	case bpf_ctx_range(struct __sk_buff, data_end):
 	case bpf_ctx_range(struct __sk_buff, flow_keys):
 	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
+	case bpf_ctx_range(struct __sk_buff, tstamp):
 		return false;
 	}
 
@@ -5638,6 +5643,10 @@ static bool cg_skb_is_valid_access(int off, int size,
 		case bpf_ctx_range(struct __sk_buff, priority):
 		case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
 			break;
+		case bpf_ctx_range(struct __sk_buff, tstamp):
+			if (!capable(CAP_SYS_ADMIN))
+				return false;
+			break;
 		default:
 			return false;
 		}
@@ -5665,6 +5674,7 @@ static bool lwt_is_valid_access(int off, int size,
 	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
 	case bpf_ctx_range(struct __sk_buff, data_meta):
 	case bpf_ctx_range(struct __sk_buff, flow_keys):
+	case bpf_ctx_range(struct __sk_buff, tstamp):
 		return false;
 	}
 
@@ -5874,6 +5884,7 @@ static bool tc_cls_act_is_valid_access(int off, int size,
 		case bpf_ctx_range(struct __sk_buff, priority):
 		case bpf_ctx_range(struct __sk_buff, tc_classid):
 		case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
+		case bpf_ctx_range(struct __sk_buff, tstamp):
 			break;
 		default:
 			return false;
@@ -6093,6 +6104,7 @@ static bool sk_skb_is_valid_access(int off, int size,
 	case bpf_ctx_range(struct __sk_buff, tc_classid):
 	case bpf_ctx_range(struct __sk_buff, data_meta):
 	case bpf_ctx_range(struct __sk_buff, flow_keys):
+	case bpf_ctx_range(struct __sk_buff, tstamp):
 		return false;
 	}
 
@@ -6179,6 +6191,7 @@ static bool flow_dissector_is_valid_access(int off, int size,
 	case bpf_ctx_range(struct __sk_buff, tc_classid):
 	case bpf_ctx_range(struct __sk_buff, data_meta):
 	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
+	case bpf_ctx_range(struct __sk_buff, tstamp):
 		return false;
 	}
 
@@ -6488,6 +6501,22 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
 		*insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
 				      si->src_reg, off);
 		break;
+
+	case offsetof(struct __sk_buff, tstamp):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tstamp) != 8);
+
+		if (type == BPF_WRITE)
+			*insn++ = BPF_STX_MEM(BPF_DW,
+					      si->dst_reg, si->src_reg,
+					      bpf_target_off(struct sk_buff,
+							     tstamp, 8,
+							     target_size));
+		else
+			*insn++ = BPF_LDX_MEM(BPF_DW,
+					      si->dst_reg, si->src_reg,
+					      bpf_target_off(struct sk_buff,
+							     tstamp, 8,
+							     target_size));
 	}
 
 	return insn - insn_buf;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index c1554aa074659..23e2031a43d43 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2468,6 +2468,7 @@ struct __sk_buff {
 
 	__u32 data_meta;
 	struct bpf_flow_keys *flow_keys;
+	__u64 tstamp;
 };
 
 struct bpf_tunnel_key {
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 54d16fbdef8b9..537a8f91af02d 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -2446,6 +2446,10 @@ static struct bpf_test tests[] = {
 				    offsetof(struct __sk_buff, tc_index)),
 			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
 				    offsetof(struct __sk_buff, cb[3])),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, tstamp)),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, tstamp)),
 			BPF_EXIT_INSN(),
 		},
 		.errstr_unpriv = "",
@@ -5297,6 +5301,31 @@ static struct bpf_test tests[] = {
 		.errstr_unpriv = "R2 leaks addr into helper function",
 		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
 	},
+	{
+		"write tstamp from CGROUP_SKB",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, tstamp)),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.result_unpriv = REJECT,
+		.errstr_unpriv = "invalid bpf_context access off=152 size=8",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"read tstamp from CGROUP_SKB",
+		.insns = {
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, tstamp)),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
 	{
 		"multiple registers share map_lookup_elem result",
 		.insns = {

From 47ae7e3d0be539c6af83b7b349ece31a18001aa6 Mon Sep 17 00:00:00 2001
From: "Nikita V. Shirokov" <tehnerd@tehnerd.com>
Date: Fri, 23 Nov 2018 12:58:12 -0800
Subject: [PATCH 66/71] libbpf: make bpf_object__open default to UNSPEC

currently by default libbpf's bpf_object__open requires
bpf's program to specify  version in a code because of two things:
1) default prog type is set to KPROBE
2) KPROBE requires (in kernel/bpf/syscall.c) version to be specified

in this patch i'm changing default prog type to UNSPEC and also changing
requirments for version's section to be present in object file.
now it would reflect what we have today in kernel
(only KPROBE prog type requires for version to be explicitly set).

v1 -> v2:
 - RFC tag has been dropped

Signed-off-by: Nikita V. Shirokov <tehnerd@tehnerd.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 0f14f7c074c29..ed4212a4c5f96 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -333,7 +333,7 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx,
 	prog->idx = idx;
 	prog->instances.fds = NULL;
 	prog->instances.nr = -1;
-	prog->type = BPF_PROG_TYPE_KPROBE;
+	prog->type = BPF_PROG_TYPE_UNSPEC;
 	prog->btf_fd = -1;
 
 	return 0;
@@ -1649,12 +1649,12 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type)
 	case BPF_PROG_TYPE_LIRC_MODE2:
 	case BPF_PROG_TYPE_SK_REUSEPORT:
 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
-		return false;
 	case BPF_PROG_TYPE_UNSPEC:
-	case BPF_PROG_TYPE_KPROBE:
 	case BPF_PROG_TYPE_TRACEPOINT:
-	case BPF_PROG_TYPE_PERF_EVENT:
 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
+	case BPF_PROG_TYPE_PERF_EVENT:
+		return false;
+	case BPF_PROG_TYPE_KPROBE:
 	default:
 		return true;
 	}

From 5a863813216ce79e16a8c1503b2543c528b778b6 Mon Sep 17 00:00:00 2001
From: "Daniel T. Lee" <danieltimlee@gmail.com>
Date: Fri, 23 Nov 2018 07:14:32 +0900
Subject: [PATCH 67/71] samples: bpf: fix: error handling regarding
 kprobe_events

Currently, kprobe_events failure won't be handled properly.
Due to calling system() indirectly to write to kprobe_events,
it can't be identified whether an error is derived from kprobe or system.

    // buf = "echo '%c:%s %s' >> /s/k/d/t/kprobe_events"
    err = system(buf);
    if (err < 0) {
        printf("failed to create kprobe ..");
        return -1;
    }

For example, running ./tracex7 sample in ext4 partition,
"echo p:open_ctree open_ctree >> /s/k/d/t/kprobe_events"
gets 256 error code system() failure.
=> The error comes from kprobe, but it's not handled correctly.

According to man of system(3), it's return value
just passes the termination status of the child shell
rather than treating the error as -1. (don't care success)

Which means, currently it's not working as desired.
(According to the upper code snippet)

    ex) running ./tracex7 with ext4 env.
    # Current Output
    sh: echo: I/O error
    failed to open event open_ctree

    # Desired Output
    failed to create kprobe 'open_ctree' error 'No such file or directory'

The problem is, error can't be verified whether from child ps
or system. But using write() directly can verify the command
failure, and it will treat all error as -1. So I suggest using
write() directly to 'kprobe_events' rather than calling system().

Signed-off-by: Daniel T. Lee <danieltimlee@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 samples/bpf/bpf_load.c | 33 ++++++++++++++++++++++++---------
 1 file changed, 24 insertions(+), 9 deletions(-)

diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index 5c052b9ea63fe..434ea34a59549 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -54,6 +54,23 @@ static int populate_prog_array(const char *event, int prog_fd)
 	return 0;
 }
 
+static int write_kprobe_events(const char *val)
+{
+	int fd, ret, flags;
+
+	if ((val != NULL) && (val[0] == '\0'))
+		flags = O_WRONLY | O_TRUNC;
+	else
+		flags = O_WRONLY | O_APPEND;
+
+	fd = open("/sys/kernel/debug/tracing/kprobe_events", flags);
+
+	ret = write(fd, val, strlen(val));
+	close(fd);
+
+	return ret;
+}
+
 static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 {
 	bool is_socket = strncmp(event, "socket", 6) == 0;
@@ -165,10 +182,9 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 
 #ifdef __x86_64__
 		if (strncmp(event, "sys_", 4) == 0) {
-			snprintf(buf, sizeof(buf),
-				 "echo '%c:__x64_%s __x64_%s' >> /sys/kernel/debug/tracing/kprobe_events",
-				 is_kprobe ? 'p' : 'r', event, event);
-			err = system(buf);
+			snprintf(buf, sizeof(buf), "%c:__x64_%s __x64_%s",
+				is_kprobe ? 'p' : 'r', event, event);
+			err = write_kprobe_events(buf);
 			if (err >= 0) {
 				need_normal_check = false;
 				event_prefix = "__x64_";
@@ -176,10 +192,9 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 		}
 #endif
 		if (need_normal_check) {
-			snprintf(buf, sizeof(buf),
-				 "echo '%c:%s %s' >> /sys/kernel/debug/tracing/kprobe_events",
-				 is_kprobe ? 'p' : 'r', event, event);
-			err = system(buf);
+			snprintf(buf, sizeof(buf), "%c:%s %s",
+				is_kprobe ? 'p' : 'r', event, event);
+			err = write_kprobe_events(buf);
 			if (err < 0) {
 				printf("failed to create kprobe '%s' error '%s'\n",
 				       event, strerror(errno));
@@ -519,7 +534,7 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
 		return 1;
 
 	/* clear all kprobes */
-	i = system("echo \"\" > /sys/kernel/debug/tracing/kprobe_events");
+	i = write_kprobe_events("");
 
 	/* scan over all elf sections to get license and map info */
 	for (i = 1; i < ehdr.e_shnum; i++) {

From 197c2dac74e4ae3158ad10f848c5402236d6b176 Mon Sep 17 00:00:00 2001
From: David Calavera <david.calavera@gmail.com>
Date: Thu, 22 Nov 2018 12:59:45 -0800
Subject: [PATCH 68/71] bpf: Add BPF_MAP_TYPE_QUEUE and BPF_MAP_TYPE_STACK to
 bpftool-map

I noticed that these two new BPF Maps are not defined in bpftool.
This patch defines those two maps and adds their names to the
bpftool-map documentation.

Signed-off-by: David Calavera <david.calavera@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/Documentation/bpftool-map.rst | 3 ++-
 tools/bpf/bpftool/map.c                         | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index f55a2daed59b7..9e827e342d9e9 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -42,7 +42,8 @@ MAP COMMANDS
 |		| **percpu_array** | **stack_trace** | **cgroup_array** | **lru_hash**
 |		| **lru_percpu_hash** | **lpm_trie** | **array_of_maps** | **hash_of_maps**
 |		| **devmap** | **sockmap** | **cpumap** | **xskmap** | **sockhash**
-|		| **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage** }
+|		| **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage**
+|		| **queue** | **stack** }
 
 DESCRIPTION
 ===========
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index a1ae2a3e9fefc..b0ebbed7d1a6f 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -74,6 +74,8 @@ static const char * const map_type_name[] = {
 	[BPF_MAP_TYPE_CGROUP_STORAGE]	= "cgroup_storage",
 	[BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray",
 	[BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE]	= "percpu_cgroup_storage",
+	[BPF_MAP_TYPE_QUEUE] = "queue",
+	[BPF_MAP_TYPE_STACK] = "stack",
 };
 
 static bool map_is_per_cpu(__u32 type)

From cf0dd411e80f7066cabf69899724e48dd3192b99 Mon Sep 17 00:00:00 2001
From: Rustam Kovhaev <rkovhaev@gmail.com>
Date: Fri, 23 Nov 2018 15:48:16 -0800
Subject: [PATCH 69/71] bpf, tags: Fix DEFINE_PER_CPU expansion

Building tags produces warning:

  ctags: Warning: kernel/bpf/local_storage.c:10: null expansion of name pattern "\1"

Let's use the same fix as in commit 25528213fe9f ("tags: Fix DEFINE_PER_CPU
expansions"), even though it violates the usual code style.

Signed-off-by: Rustam Kovhaev <rkovhaev@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/local_storage.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index c97a8f968638c..9e94b1cc6cf23 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -7,8 +7,7 @@
 #include <linux/rbtree.h>
 #include <linux/slab.h>
 
-DEFINE_PER_CPU(struct bpf_cgroup_storage*,
-	       bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
+DEFINE_PER_CPU(struct bpf_cgroup_storage*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
 
 #ifdef CONFIG_CGROUP_BPF
 

From 311fe1a813324ea6d8172a3e9eefb1b274c72fea Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sun, 25 Nov 2018 23:32:51 +0000
Subject: [PATCH 70/71] bpf: btf: fix spelling mistake "Memmber" -> "Member"

There is a spelling mistake in a btf_verifier_log_member message,
fix it.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/btf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 69da9169819ad..a09b2f94ab25b 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -1621,7 +1621,7 @@ static s32 btf_struct_check_meta(struct btf_verifier_env *env,
 
 		if (BITS_ROUNDUP_BYTES(member->offset) > struct_size) {
 			btf_verifier_log_member(env, t, member,
-						"Memmber bits_offset exceeds its struct size");
+						"Member bits_offset exceeds its struct size");
 			return -EINVAL;
 		}
 

From ffac28f95a98a87db0850801cd98771a08bb1dec Mon Sep 17 00:00:00 2001
From: David Calavera <david.calavera@gmail.com>
Date: Fri, 23 Nov 2018 15:58:39 -0800
Subject: [PATCH 71/71] bpf: align map type names formatting.

Make the formatting for map_type_name array consistent.

Signed-off-by: David Calavera <david.calavera@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/map.c | 46 ++++++++++++++++++++---------------------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index b0ebbed7d1a6f..cbd3080e72c77 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -52,30 +52,30 @@
 #include "main.h"
 
 static const char * const map_type_name[] = {
-	[BPF_MAP_TYPE_UNSPEC]		= "unspec",
-	[BPF_MAP_TYPE_HASH]		= "hash",
-	[BPF_MAP_TYPE_ARRAY]		= "array",
-	[BPF_MAP_TYPE_PROG_ARRAY]	= "prog_array",
-	[BPF_MAP_TYPE_PERF_EVENT_ARRAY]	= "perf_event_array",
-	[BPF_MAP_TYPE_PERCPU_HASH]	= "percpu_hash",
-	[BPF_MAP_TYPE_PERCPU_ARRAY]	= "percpu_array",
-	[BPF_MAP_TYPE_STACK_TRACE]	= "stack_trace",
-	[BPF_MAP_TYPE_CGROUP_ARRAY]	= "cgroup_array",
-	[BPF_MAP_TYPE_LRU_HASH]		= "lru_hash",
-	[BPF_MAP_TYPE_LRU_PERCPU_HASH]	= "lru_percpu_hash",
-	[BPF_MAP_TYPE_LPM_TRIE]		= "lpm_trie",
-	[BPF_MAP_TYPE_ARRAY_OF_MAPS]	= "array_of_maps",
-	[BPF_MAP_TYPE_HASH_OF_MAPS]	= "hash_of_maps",
-	[BPF_MAP_TYPE_DEVMAP]		= "devmap",
-	[BPF_MAP_TYPE_SOCKMAP]		= "sockmap",
-	[BPF_MAP_TYPE_CPUMAP]		= "cpumap",
-	[BPF_MAP_TYPE_XSKMAP]           = "xskmap",
-	[BPF_MAP_TYPE_SOCKHASH]		= "sockhash",
-	[BPF_MAP_TYPE_CGROUP_STORAGE]	= "cgroup_storage",
-	[BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray",
+	[BPF_MAP_TYPE_UNSPEC]			= "unspec",
+	[BPF_MAP_TYPE_HASH]			= "hash",
+	[BPF_MAP_TYPE_ARRAY]			= "array",
+	[BPF_MAP_TYPE_PROG_ARRAY]		= "prog_array",
+	[BPF_MAP_TYPE_PERF_EVENT_ARRAY]		= "perf_event_array",
+	[BPF_MAP_TYPE_PERCPU_HASH]		= "percpu_hash",
+	[BPF_MAP_TYPE_PERCPU_ARRAY]		= "percpu_array",
+	[BPF_MAP_TYPE_STACK_TRACE]		= "stack_trace",
+	[BPF_MAP_TYPE_CGROUP_ARRAY]		= "cgroup_array",
+	[BPF_MAP_TYPE_LRU_HASH]			= "lru_hash",
+	[BPF_MAP_TYPE_LRU_PERCPU_HASH]		= "lru_percpu_hash",
+	[BPF_MAP_TYPE_LPM_TRIE]			= "lpm_trie",
+	[BPF_MAP_TYPE_ARRAY_OF_MAPS]		= "array_of_maps",
+	[BPF_MAP_TYPE_HASH_OF_MAPS]		= "hash_of_maps",
+	[BPF_MAP_TYPE_DEVMAP]			= "devmap",
+	[BPF_MAP_TYPE_SOCKMAP]			= "sockmap",
+	[BPF_MAP_TYPE_CPUMAP]			= "cpumap",
+	[BPF_MAP_TYPE_XSKMAP]			= "xskmap",
+	[BPF_MAP_TYPE_SOCKHASH]			= "sockhash",
+	[BPF_MAP_TYPE_CGROUP_STORAGE]		= "cgroup_storage",
+	[BPF_MAP_TYPE_REUSEPORT_SOCKARRAY]	= "reuseport_sockarray",
 	[BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE]	= "percpu_cgroup_storage",
-	[BPF_MAP_TYPE_QUEUE] = "queue",
-	[BPF_MAP_TYPE_STACK] = "stack",
+	[BPF_MAP_TYPE_QUEUE]			= "queue",
+	[BPF_MAP_TYPE_STACK]			= "stack",
 };
 
 static bool map_is_per_cpu(__u32 type)