From 36641ad61db5ce9befd5eb0071abb36eaff16cfc Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 24 Oct 2018 22:05:43 +0200 Subject: [PATCH 1/7] bpf: fix test suite to enable all unpriv program types Given BPF_PROG_TYPE_CGROUP_SKB program types are also valid in an unprivileged setting, lets not omit these tests and potentially have issues fall through the cracks. Make this more obvious by adding a small test_as_unpriv() helper. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_verifier.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 769d68a48f303..8e1a79d2792c4 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -4891,6 +4891,8 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R3 pointer comparison prohibited", .prog_type = BPF_PROG_TYPE_CGROUP_SKB, }, { @@ -5146,6 +5148,7 @@ static struct bpf_test tests[] = { .fixup_cgroup_storage = { 1 }, .result = REJECT, .errstr = "get_local_storage() doesn't support non-zero flags", + .errstr_unpriv = "R2 leaks addr into helper function", .prog_type = BPF_PROG_TYPE_CGROUP_SKB, }, { @@ -5261,6 +5264,7 @@ static struct bpf_test tests[] = { .fixup_percpu_cgroup_storage = { 1 }, .result = REJECT, .errstr = "get_local_storage() doesn't support non-zero flags", + .errstr_unpriv = "R2 leaks addr into helper function", .prog_type = BPF_PROG_TYPE_CGROUP_SKB, }, { @@ -14050,6 +14054,13 @@ static void get_unpriv_disabled() fclose(fd); } +static bool test_as_unpriv(struct bpf_test *test) +{ + return !test->prog_type || + test->prog_type == BPF_PROG_TYPE_SOCKET_FILTER || + test->prog_type == BPF_PROG_TYPE_CGROUP_SKB; +} + static int do_test(bool unpriv, unsigned int from, unsigned int to) { int i, passes = 0, errors = 0, skips = 0; @@ -14060,10 +14071,10 @@ static int do_test(bool unpriv, unsigned int from, unsigned int to) /* Program types that are not supported by non-root we * skip right away. */ - if (!test->prog_type && unpriv_disabled) { + if (test_as_unpriv(test) && unpriv_disabled) { printf("#%d/u %s SKIP\n", i, test->descr); skips++; - } else if (!test->prog_type) { + } else if (test_as_unpriv(test)) { if (!unpriv) set_admin(false); printf("#%d/u %s ", i, test->descr); From ab21c1b5f799395232b838e98981cfed6d647905 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 24 Oct 2018 22:05:44 +0200 Subject: [PATCH 2/7] bpf: disallow direct packet access for unpriv in cg_skb Commit b39b5f411dcf ("bpf: add cg_skb_is_valid_access for BPF_PROG_TYPE_CGROUP_SKB") added support for returning pkt pointers for direct packet access. Given this program type is allowed for both unprivileged and privileged users, we shouldn't allow unprivileged ones to use it, e.g. besides others one reason would be to avoid any potential speculation on the packet test itself, thus guard this for root only. Fixes: b39b5f411dcf ("bpf: add cg_skb_is_valid_access for BPF_PROG_TYPE_CGROUP_SKB") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Cc: Song Liu Signed-off-by: Alexei Starovoitov --- net/core/filter.c | 6 ++++++ tools/testing/selftests/bpf/test_verifier.c | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/net/core/filter.c b/net/core/filter.c index 35c6933c26229..3fdddfa9a0fd9 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5496,7 +5496,13 @@ static bool cg_skb_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, flow_keys): return false; + case bpf_ctx_range(struct __sk_buff, data): + case bpf_ctx_range(struct __sk_buff, data_end): + if (!capable(CAP_SYS_ADMIN)) + return false; + break; } + if (type == BPF_WRITE) { switch (off) { case bpf_ctx_range(struct __sk_buff, mark): diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 8e1a79d2792c4..36f3d3009d1a0 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -4892,7 +4892,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .result_unpriv = REJECT, - .errstr_unpriv = "R3 pointer comparison prohibited", + .errstr_unpriv = "invalid bpf_context access off=76 size=4", .prog_type = BPF_PROG_TYPE_CGROUP_SKB, }, { From 5d66fa7d9e9e9399ddfdc530f352dd6f7c724485 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 24 Oct 2018 22:05:45 +0200 Subject: [PATCH 3/7] bpf: fix direct packet access for flow dissector progs Commit d58e468b1112 ("flow_dissector: implements flow dissector BPF hook") added direct packet access for skbs in may_access_direct_pkt_data() function where this enables read and write access to the skb->data. This is buggy because without a prologue generator such as bpf_unclone_prologue() we would allow for writing into cloned skbs. Original intention might have been to only allow read access where this is not needed (similar as the flow_dissector_func_proto() indicates which enables only bpf_skb_load_bytes() as well), therefore this patch fixes it to restrict to read-only. Fixes: d58e468b1112 ("flow_dissector: implements flow dissector BPF hook") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Cc: Petar Penkov Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 98fa0be35370b..b0cc8f2ff95f8 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1387,21 +1387,23 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, enum bpf_access_type t) { switch (env->prog->type) { + /* Program types only with direct read access go here! */ case BPF_PROG_TYPE_LWT_IN: case BPF_PROG_TYPE_LWT_OUT: case BPF_PROG_TYPE_LWT_SEG6LOCAL: case BPF_PROG_TYPE_SK_REUSEPORT: - /* dst_input() and dst_output() can't write for now */ + case BPF_PROG_TYPE_FLOW_DISSECTOR: if (t == BPF_WRITE) return false; /* fallthrough */ + + /* Program types with direct read + write access go here! */ case BPF_PROG_TYPE_SCHED_CLS: case BPF_PROG_TYPE_SCHED_ACT: case BPF_PROG_TYPE_XDP: case BPF_PROG_TYPE_LWT_XMIT: case BPF_PROG_TYPE_SK_SKB: case BPF_PROG_TYPE_SK_MSG: - case BPF_PROG_TYPE_FLOW_DISSECTOR: if (meta) return meta->pkt_access; From d5563d367c2ce48ea3d675c77f7109f37311943d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 24 Oct 2018 22:05:46 +0200 Subject: [PATCH 4/7] bpf: fix cg_skb types to hint access type in may_access_direct_pkt_data Commit b39b5f411dcf ("bpf: add cg_skb_is_valid_access for BPF_PROG_TYPE_CGROUP_SKB") added direct packet access for skbs in cg_skb program types, however allowed access type was not added to the may_access_direct_pkt_data() helper. Therefore the latter always returns false. This is not directly an issue, it just means writes are unconditionally disabled (which is correct) but also reads. Latter is relevant in this function when BPF helpers may read direct packet data which is unconditionally disabled then. Fix it by properly adding BPF_PROG_TYPE_CGROUP_SKB to may_access_direct_pkt_data(). Fixes: b39b5f411dcf ("bpf: add cg_skb_is_valid_access for BPF_PROG_TYPE_CGROUP_SKB") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Cc: Song Liu Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b0cc8f2ff95f8..5fc9a658af0e1 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1393,6 +1393,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, case BPF_PROG_TYPE_LWT_SEG6LOCAL: case BPF_PROG_TYPE_SK_REUSEPORT: case BPF_PROG_TYPE_FLOW_DISSECTOR: + case BPF_PROG_TYPE_CGROUP_SKB: if (t == BPF_WRITE) return false; /* fallthrough */ From 80b0d86a176cab6201719b8dfd806902b0c6e046 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 24 Oct 2018 22:05:47 +0200 Subject: [PATCH 5/7] bpf: fix direct packet write into pop/peek helpers Commit f1a2e44a3aec ("bpf: add queue and stack maps") probably just copy-pasted .pkt_access for bpf_map_{pop,peek}_elem() helpers, but this is buggy in this context since it would allow writes into cloned skbs which is invalid. Therefore, disable .pkt_access for the two. Fixes: f1a2e44a3aec ("bpf: add queue and stack maps") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Cc: Mauricio Vasquez B Acked-by: Mauricio Vasquez B Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index ab0d5e3f9892b..a74972b07e746 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -99,7 +99,6 @@ BPF_CALL_2(bpf_map_pop_elem, struct bpf_map *, map, void *, value) const struct bpf_func_proto bpf_map_pop_elem_proto = { .func = bpf_map_pop_elem, .gpl_only = false, - .pkt_access = true, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE, @@ -113,7 +112,6 @@ BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value) const struct bpf_func_proto bpf_map_peek_elem_proto = { .func = bpf_map_pop_elem, .gpl_only = false, - .pkt_access = true, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE, From d3f66e4116aff8dd0d5bd4067295b9ddb5e2c29c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 24 Oct 2018 22:05:48 +0200 Subject: [PATCH 6/7] bpf: fix leaking uninitialized memory on pop/peek helpers Commit f1a2e44a3aec ("bpf: add queue and stack maps") added helpers with ARG_PTR_TO_UNINIT_MAP_VALUE. Meaning, the helper is supposed to fill the map value buffer with data instead of reading from it like in other helpers such as map update. However, given the buffer is allowed to be uninitialized (since we fill it in the helper anyway), it also means that the helper is obliged to wipe the memory in case of an error in order to not allow for leaking uninitialized memory. Given pop/peek is both handled inside __{stack,queue}_map_get(), lets wipe it there on error case, that is, empty stack/queue. Fixes: f1a2e44a3aec ("bpf: add queue and stack maps") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Cc: Mauricio Vasquez B Acked-by: Mauricio Vasquez B Signed-off-by: Alexei Starovoitov --- kernel/bpf/queue_stack_maps.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c index 12a93fb374492..8bbd72d3a121f 100644 --- a/kernel/bpf/queue_stack_maps.c +++ b/kernel/bpf/queue_stack_maps.c @@ -122,6 +122,7 @@ static int __queue_map_get(struct bpf_map *map, void *value, bool delete) raw_spin_lock_irqsave(&qs->lock, flags); if (queue_stack_map_is_empty(qs)) { + memset(value, 0, qs->map.value_size); err = -ENOENT; goto out; } @@ -151,6 +152,7 @@ static int __stack_map_get(struct bpf_map *map, void *value, bool delete) raw_spin_lock_irqsave(&qs->lock, flags); if (queue_stack_map_is_empty(qs)) { + memset(value, 0, qs->map.value_size); err = -ENOENT; goto out; } From b09928b976280d64060d7bee146d7df5c5a29bef Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 24 Oct 2018 22:05:49 +0200 Subject: [PATCH 7/7] bpf: make direct packet write unclone more robust Given this seems to be quite fragile and can easily slip through the cracks, lets make direct packet write more robust by requiring that future program types which allow for such write must provide a prologue callback. In case of XDP and sk_msg it's noop, thus add a generic noop handler there. The latter starts out with NULL data/data_end unconditionally when sg pages are shared. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: Song Liu Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 6 +++++- net/core/filter.c | 11 +++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5fc9a658af0e1..171a2c88e77dd 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5709,7 +5709,11 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) bool is_narrower_load; u32 target_size; - if (ops->gen_prologue) { + if (ops->gen_prologue || env->seen_direct_write) { + if (!ops->gen_prologue) { + verbose(env, "bpf verifier is misconfigured\n"); + return -EINVAL; + } cnt = ops->gen_prologue(insn_buf, env->seen_direct_write, env->prog); if (cnt >= ARRAY_SIZE(insn_buf)) { diff --git a/net/core/filter.c b/net/core/filter.c index 3fdddfa9a0fd9..cd648d09a8e5b 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5644,6 +5644,15 @@ static bool sock_filter_is_valid_access(int off, int size, prog->expected_attach_type); } +static int bpf_noop_prologue(struct bpf_insn *insn_buf, bool direct_write, + const struct bpf_prog *prog) +{ + /* Neither direct read nor direct write requires any preliminary + * action. + */ + return 0; +} + static int bpf_unclone_prologue(struct bpf_insn *insn_buf, bool direct_write, const struct bpf_prog *prog, int drop_verdict) { @@ -7210,6 +7219,7 @@ const struct bpf_verifier_ops xdp_verifier_ops = { .get_func_proto = xdp_func_proto, .is_valid_access = xdp_is_valid_access, .convert_ctx_access = xdp_convert_ctx_access, + .gen_prologue = bpf_noop_prologue, }; const struct bpf_prog_ops xdp_prog_ops = { @@ -7308,6 +7318,7 @@ const struct bpf_verifier_ops sk_msg_verifier_ops = { .get_func_proto = sk_msg_func_proto, .is_valid_access = sk_msg_is_valid_access, .convert_ctx_access = sk_msg_convert_ctx_access, + .gen_prologue = bpf_noop_prologue, }; const struct bpf_prog_ops sk_msg_prog_ops = {