From 303def35f64e37bcd5401d202889f5fbc0241179 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 17 May 2018 14:16:58 -0700 Subject: [PATCH 1/2] bpf: allow sk_msg programs to read sock fields Currently sk_msg programs only have access to the raw data. However, it is often useful when building policies to have the policies specific to the socket endpoint. This allows using the socket tuple as input into filters, etc. This patch adds ctx access to the sock fields. Signed-off-by: John Fastabend Acked-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann --- include/linux/filter.h | 1 + include/uapi/linux/bpf.h | 8 +++ kernel/bpf/sockmap.c | 1 + net/core/filter.c | 114 +++++++++++++++++++++++++++++++++++++-- 4 files changed, 121 insertions(+), 3 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index 9dbcb9d55921b..d358d1815c165 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -517,6 +517,7 @@ struct sk_msg_buff { bool sg_copy[MAX_SKB_FRAGS]; __u32 flags; struct sock *sk_redir; + struct sock *sk; struct sk_buff *skb; struct list_head list; }; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d94d333a82259..97446bbe2ca52 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2176,6 +2176,14 @@ enum sk_action { struct sk_msg_md { void *data; void *data_end; + + __u32 family; + __u32 remote_ip4; /* Stored in network byte order */ + __u32 local_ip4; /* Stored in network byte order */ + __u32 remote_ip6[4]; /* Stored in network byte order */ + __u32 local_ip6[4]; /* Stored in network byte order */ + __u32 remote_port; /* Stored in network byte order */ + __u32 local_port; /* stored in host byte order */ }; #define BPF_TAG_SIZE 8 diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index c682669570dc4..3b28955a6383e 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -523,6 +523,7 @@ static unsigned int smap_do_tx_msg(struct sock *sk, } bpf_compute_data_pointers_sg(md); + md->sk = sk; rc = (*prog->bpf_func)(md, prog->insnsi); psock->apply_bytes = md->apply_bytes; diff --git a/net/core/filter.c b/net/core/filter.c index 6d0d1560bd70e..aec5ebafb2629 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5148,18 +5148,23 @@ static bool sk_msg_is_valid_access(int off, int size, switch (off) { case offsetof(struct sk_msg_md, data): info->reg_type = PTR_TO_PACKET; + if (size != sizeof(__u64)) + return false; break; case offsetof(struct sk_msg_md, data_end): info->reg_type = PTR_TO_PACKET_END; + if (size != sizeof(__u64)) + return false; break; + default: + if (size != sizeof(__u32)) + return false; } if (off < 0 || off >= sizeof(struct sk_msg_md)) return false; if (off % size != 0) return false; - if (size != sizeof(__u64)) - return false; return true; } @@ -5835,7 +5840,8 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, break; case offsetof(struct bpf_sock_ops, local_ip4): - BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_rcv_saddr) != 4); + BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, + skc_rcv_saddr) != 4); *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( struct bpf_sock_ops_kern, sk), @@ -6152,6 +6158,7 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type, struct bpf_prog *prog, u32 *target_size) { struct bpf_insn *insn = insn_buf; + int off; switch (si->off) { case offsetof(struct sk_msg_md, data): @@ -6164,6 +6171,107 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type, si->dst_reg, si->src_reg, offsetof(struct sk_msg_buff, data_end)); break; + case offsetof(struct sk_msg_md, family): + BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2); + + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( + struct sk_msg_buff, sk), + si->dst_reg, si->src_reg, + offsetof(struct sk_msg_buff, sk)); + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, + offsetof(struct sock_common, skc_family)); + break; + + case offsetof(struct sk_msg_md, remote_ip4): + BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4); + + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( + struct sk_msg_buff, sk), + si->dst_reg, si->src_reg, + offsetof(struct sk_msg_buff, sk)); + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, + offsetof(struct sock_common, skc_daddr)); + break; + + case offsetof(struct sk_msg_md, local_ip4): + BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, + skc_rcv_saddr) != 4); + + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( + struct sk_msg_buff, sk), + si->dst_reg, si->src_reg, + offsetof(struct sk_msg_buff, sk)); + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, + offsetof(struct sock_common, + skc_rcv_saddr)); + break; + + case offsetof(struct sk_msg_md, remote_ip6[0]) ... + offsetof(struct sk_msg_md, remote_ip6[3]): +#if IS_ENABLED(CONFIG_IPV6) + BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, + skc_v6_daddr.s6_addr32[0]) != 4); + + off = si->off; + off -= offsetof(struct sk_msg_md, remote_ip6[0]); + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( + struct sk_msg_buff, sk), + si->dst_reg, si->src_reg, + offsetof(struct sk_msg_buff, sk)); + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, + offsetof(struct sock_common, + skc_v6_daddr.s6_addr32[0]) + + off); +#else + *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); +#endif + break; + + case offsetof(struct sk_msg_md, local_ip6[0]) ... + offsetof(struct sk_msg_md, local_ip6[3]): +#if IS_ENABLED(CONFIG_IPV6) + BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, + skc_v6_rcv_saddr.s6_addr32[0]) != 4); + + off = si->off; + off -= offsetof(struct sk_msg_md, local_ip6[0]); + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( + struct sk_msg_buff, sk), + si->dst_reg, si->src_reg, + offsetof(struct sk_msg_buff, sk)); + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, + offsetof(struct sock_common, + skc_v6_rcv_saddr.s6_addr32[0]) + + off); +#else + *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); +#endif + break; + + case offsetof(struct sk_msg_md, remote_port): + BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2); + + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( + struct sk_msg_buff, sk), + si->dst_reg, si->src_reg, + offsetof(struct sk_msg_buff, sk)); + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, + offsetof(struct sock_common, skc_dport)); +#ifndef __BIG_ENDIAN_BITFIELD + *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16); +#endif + break; + + case offsetof(struct sk_msg_md, local_port): + BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2); + + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( + struct sk_msg_buff, sk), + si->dst_reg, si->src_reg, + offsetof(struct sk_msg_buff, sk)); + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, + offsetof(struct sock_common, skc_num)); + break; } return insn - insn_buf; From 4da0dcabe4bc633ffc13bb4a669c34dd876e59d1 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 17 May 2018 14:17:03 -0700 Subject: [PATCH 2/2] bpf: add sk_msg prog sk access tests to test_verifier Add tests for BPF_PROG_TYPE_SK_MSG to test_verifier for read access to new sk fields. Signed-off-by: John Fastabend Acked-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann --- tools/include/uapi/linux/bpf.h | 8 ++ tools/testing/selftests/bpf/test_verifier.c | 115 ++++++++++++++++++++ 2 files changed, 123 insertions(+) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index d94d333a82259..97446bbe2ca52 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2176,6 +2176,14 @@ enum sk_action { struct sk_msg_md { void *data; void *data_end; + + __u32 family; + __u32 remote_ip4; /* Stored in network byte order */ + __u32 local_ip4; /* Stored in network byte order */ + __u32 remote_ip6[4]; /* Stored in network byte order */ + __u32 local_ip6[4]; /* Stored in network byte order */ + __u32 remote_port; /* Stored in network byte order */ + __u32 local_port; /* stored in host byte order */ }; #define BPF_TAG_SIZE 8 diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index a877af00605d1..6ec4d9def877f 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -1685,6 +1685,121 @@ static struct bpf_test tests[] = { .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SK_SKB, }, + { + "valid access family in SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, family)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_MSG, + }, + { + "valid access remote_ip4 in SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, remote_ip4)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_MSG, + }, + { + "valid access local_ip4 in SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, local_ip4)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_MSG, + }, + { + "valid access remote_port in SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, remote_port)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_MSG, + }, + { + "valid access local_port in SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, local_port)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_MSG, + }, + { + "valid access remote_ip6 in SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, remote_ip6[0])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, remote_ip6[1])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, remote_ip6[2])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, remote_ip6[3])), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_SKB, + }, + { + "valid access local_ip6 in SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, local_ip6[0])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, local_ip6[1])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, local_ip6[2])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, local_ip6[3])), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_SKB, + }, + { + "invalid 64B read of family in SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, + offsetof(struct sk_msg_md, family)), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_MSG, + }, + { + "invalid read past end of SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct sk_msg_md, local_port) + 4), + BPF_EXIT_INSN(), + }, + .errstr = "R0 !read_ok", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_MSG, + }, + { + "invalid read offset in SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct sk_msg_md, family) + 1), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_MSG, + }, { "direct packet read for SK_MSG", .insns = {