From 16b2f264983dc264c1560cc0170e760dec1bf54f Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 21 Dec 2023 15:23:23 -0800 Subject: [PATCH 1/5] bpf: sockmap, fix proto update hook to avoid dup calls When sockets are added to a sockmap or sockhash we allocate and init a psock. Then update the proto ops with sock_map_init_proto the flow is sock_hash_update_common sock_map_link psock = sock_map_psock_get_checked() <-returns existing psock sock_map_init_proto(sk, psock) <- updates sk_proto If the socket is already in a map this results in the sock_map_init_proto being called multiple times on the same socket. We do this because when a socket is added to multiple maps this might result in a new set of BPF programs being attached to the socket requiring an updated ops struct. This creates a rule where it must be safe to call psock_update_sk_prot multiple times. When we added a fix for UAF through unix sockets in patch 4dd9a38a753fc we broke this rule by adding a sock_hold in that path to ensure the sock is not released. The result is if a af_unix stream sock is placed in multiple maps it results in a memory leak because we call sock_hold multiple times with only a single sock_put on it. Fixes: 8866730aed51 ("bpf, sockmap: af_unix stream sockets need to hold ref for pair sock") Reported-by: Xingwei Lee Signed-off-by: John Fastabend Signed-off-by: Martin KaFai Lau Reviewed-by: Jakub Sitnicki Link: https://lore.kernel.org/r/20231221232327.43678-2-john.fastabend@gmail.com --- net/unix/unix_bpf.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c index 7ea7c3a0d0d06..bd84785bf8d6c 100644 --- a/net/unix/unix_bpf.c +++ b/net/unix/unix_bpf.c @@ -161,15 +161,30 @@ int unix_stream_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool r { struct sock *sk_pair; + /* Restore does not decrement the sk_pair reference yet because we must + * keep the a reference to the socket until after an RCU grace period + * and any pending sends have completed. + */ if (restore) { sk->sk_write_space = psock->saved_write_space; sock_replace_proto(sk, psock->sk_proto); return 0; } - sk_pair = unix_peer(sk); - sock_hold(sk_pair); - psock->sk_pair = sk_pair; + /* psock_update_sk_prot can be called multiple times if psock is + * added to multiple maps and/or slots in the same map. There is + * also an edge case where replacing a psock with itself can trigger + * an extra psock_update_sk_prot during the insert process. So it + * must be safe to do multiple calls. Here we need to ensure we don't + * increment the refcnt through sock_hold many times. There will only + * be a single matching destroy operation. + */ + if (!psock->sk_pair) { + sk_pair = unix_peer(sk); + sock_hold(sk_pair); + psock->sk_pair = sk_pair; + } + unix_stream_bpf_check_needs_rebuild(psock->sk_proto); sock_replace_proto(sk, &unix_stream_bpf_prot); return 0; From 7865dfb1eb941ddd25802a9e13b6ff5f3f4dc02f Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 21 Dec 2023 15:23:24 -0800 Subject: [PATCH 2/5] bpf: sockmap, added comments describing update proto rules Add a comment describing that the psock update proto callbback can be called multiple times and this must be safe. Signed-off-by: John Fastabend Signed-off-by: Martin KaFai Lau Reviewed-by: Jakub Sitnicki Link: https://lore.kernel.org/r/20231221232327.43678-3-john.fastabend@gmail.com --- include/linux/skmsg.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index c953b8c0d2f43..888a4b217829f 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -100,6 +100,11 @@ struct sk_psock { void (*saved_close)(struct sock *sk, long timeout); void (*saved_write_space)(struct sock *sk); void (*saved_data_ready)(struct sock *sk); + /* psock_update_sk_prot may be called with restore=false many times + * so the handler must be safe for this case. It will be called + * exactly once with restore=true when the psock is being destroyed + * and psock refcnt is zero, but before an RCU grace period. + */ int (*psock_update_sk_prot)(struct sock *sk, struct sk_psock *psock, bool restore); struct proto *sk_proto; From 8c1b382a555adcd2008ae964047a35b739dfaf24 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 21 Dec 2023 15:23:25 -0800 Subject: [PATCH 3/5] bpf: sockmap, add tests for proto updates many to single map Add test with a single map where each socket is inserted multiple times. Test protocols: TCP, UDP, stream af_unix and dgram af_unix. Signed-off-by: John Fastabend Signed-off-by: Martin KaFai Lau Reviewed-by: Jakub Sitnicki Link: https://lore.kernel.org/r/20231221232327.43678-4-john.fastabend@gmail.com --- .../selftests/bpf/prog_tests/sockmap_basic.c | 71 ++++++++++++++++++- 1 file changed, 70 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 7c2241fae19a6..ebd05788cfa09 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -555,6 +555,74 @@ static void test_sockmap_unconnected_unix(void) close(dgram); } +static void test_sockmap_many_socket(void) +{ + struct test_sockmap_pass_prog *skel; + int stream[2], dgram, udp, tcp; + int i, err, map, entry = 0; + + skel = test_sockmap_pass_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + map = bpf_map__fd(skel->maps.sock_map_rx); + + dgram = xsocket(AF_UNIX, SOCK_DGRAM, 0); + if (dgram < 0) { + test_sockmap_pass_prog__destroy(skel); + return; + } + + tcp = connected_socket_v4(); + if (!ASSERT_GE(tcp, 0, "connected_socket_v4")) { + close(dgram); + test_sockmap_pass_prog__destroy(skel); + return; + } + + udp = xsocket(AF_INET, SOCK_DGRAM | SOCK_NONBLOCK, 0); + if (udp < 0) { + close(dgram); + close(tcp); + test_sockmap_pass_prog__destroy(skel); + return; + } + + err = socketpair(AF_UNIX, SOCK_STREAM, 0, stream); + ASSERT_OK(err, "socketpair(af_unix, sock_stream)"); + if (err) + goto out; + + for (i = 0; i < 2; i++, entry++) { + err = bpf_map_update_elem(map, &entry, &stream[0], BPF_ANY); + ASSERT_OK(err, "bpf_map_update_elem(stream)"); + } + for (i = 0; i < 2; i++, entry++) { + err = bpf_map_update_elem(map, &entry, &dgram, BPF_ANY); + ASSERT_OK(err, "bpf_map_update_elem(dgram)"); + } + for (i = 0; i < 2; i++, entry++) { + err = bpf_map_update_elem(map, &entry, &udp, BPF_ANY); + ASSERT_OK(err, "bpf_map_update_elem(udp)"); + } + for (i = 0; i < 2; i++, entry++) { + err = bpf_map_update_elem(map, &entry, &tcp, BPF_ANY); + ASSERT_OK(err, "bpf_map_update_elem(tcp)"); + } + for (entry--; entry >= 0; entry--) { + err = bpf_map_delete_elem(map, &entry); + ASSERT_OK(err, "bpf_map_delete_elem(entry)"); + } + + close(stream[0]); + close(stream[1]); +out: + close(dgram); + close(tcp); + close(udp); + test_sockmap_pass_prog__destroy(skel); +} + void test_sockmap_basic(void) { if (test__start_subtest("sockmap create_update_free")) @@ -597,7 +665,8 @@ void test_sockmap_basic(void) test_sockmap_skb_verdict_fionread(false); if (test__start_subtest("sockmap skb_verdict msg_f_peek")) test_sockmap_skb_verdict_peek(); - if (test__start_subtest("sockmap unconnected af_unix")) test_sockmap_unconnected_unix(); + if (test__start_subtest("sockmap one socket to many map entries")) + test_sockmap_many_socket(); } From f1300467dd9f67293a7aae86fd26471520fac36d Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 21 Dec 2023 15:23:26 -0800 Subject: [PATCH 4/5] bpf: sockmap, add tests for proto updates single socket to many map Add test with multiple maps where each socket is inserted in multiple maps. Test protocols: TCP, UDP, stream af_unix and dgram af_unix. Signed-off-by: John Fastabend Signed-off-by: Martin KaFai Lau Reviewed-by: Jakub Sitnicki Link: https://lore.kernel.org/r/20231221232327.43678-5-john.fastabend@gmail.com --- .../selftests/bpf/prog_tests/sockmap_basic.c | 74 +++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index ebd05788cfa09..74bca07ebec20 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -623,6 +623,78 @@ static void test_sockmap_many_socket(void) test_sockmap_pass_prog__destroy(skel); } +static void test_sockmap_many_maps(void) +{ + struct test_sockmap_pass_prog *skel; + int stream[2], dgram, udp, tcp; + int i, err, map[2], entry = 0; + + skel = test_sockmap_pass_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + map[0] = bpf_map__fd(skel->maps.sock_map_rx); + map[1] = bpf_map__fd(skel->maps.sock_map_tx); + + dgram = xsocket(AF_UNIX, SOCK_DGRAM, 0); + if (dgram < 0) { + test_sockmap_pass_prog__destroy(skel); + return; + } + + tcp = connected_socket_v4(); + if (!ASSERT_GE(tcp, 0, "connected_socket_v4")) { + close(dgram); + test_sockmap_pass_prog__destroy(skel); + return; + } + + udp = xsocket(AF_INET, SOCK_DGRAM | SOCK_NONBLOCK, 0); + if (udp < 0) { + close(dgram); + close(tcp); + test_sockmap_pass_prog__destroy(skel); + return; + } + + err = socketpair(AF_UNIX, SOCK_STREAM, 0, stream); + ASSERT_OK(err, "socketpair(af_unix, sock_stream)"); + if (err) + goto out; + + for (i = 0; i < 2; i++, entry++) { + err = bpf_map_update_elem(map[i], &entry, &stream[0], BPF_ANY); + ASSERT_OK(err, "bpf_map_update_elem(stream)"); + } + for (i = 0; i < 2; i++, entry++) { + err = bpf_map_update_elem(map[i], &entry, &dgram, BPF_ANY); + ASSERT_OK(err, "bpf_map_update_elem(dgram)"); + } + for (i = 0; i < 2; i++, entry++) { + err = bpf_map_update_elem(map[i], &entry, &udp, BPF_ANY); + ASSERT_OK(err, "bpf_map_update_elem(udp)"); + } + for (i = 0; i < 2; i++, entry++) { + err = bpf_map_update_elem(map[i], &entry, &tcp, BPF_ANY); + ASSERT_OK(err, "bpf_map_update_elem(tcp)"); + } + for (entry--; entry >= 0; entry--) { + err = bpf_map_delete_elem(map[1], &entry); + entry--; + ASSERT_OK(err, "bpf_map_delete_elem(entry)"); + err = bpf_map_delete_elem(map[0], &entry); + ASSERT_OK(err, "bpf_map_delete_elem(entry)"); + } + + close(stream[0]); + close(stream[1]); +out: + close(dgram); + close(tcp); + close(udp); + test_sockmap_pass_prog__destroy(skel); +} + void test_sockmap_basic(void) { if (test__start_subtest("sockmap create_update_free")) @@ -669,4 +741,6 @@ void test_sockmap_basic(void) test_sockmap_unconnected_unix(); if (test__start_subtest("sockmap one socket to many map entries")) test_sockmap_many_socket(); + if (test__start_subtest("sockmap one socket to many maps")) + test_sockmap_many_maps(); } From bdbca46d3f84a4455cd5c15a7483666218851549 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 21 Dec 2023 15:23:27 -0800 Subject: [PATCH 5/5] bpf: sockmap, add tests for proto updates replace socket Add test that replaces the same socket with itself. This exercises a corner case where old element and new element have the same posck. Test protocols: TCP, UDP, stream af_unix and dgram af_unix. Signed-off-by: John Fastabend Signed-off-by: Martin KaFai Lau Reviewed-by: Jakub Sitnicki Link: https://lore.kernel.org/r/20231221232327.43678-6-john.fastabend@gmail.com --- .../selftests/bpf/prog_tests/sockmap_basic.c | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 74bca07ebec20..77e26ecffa9d7 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -695,6 +695,73 @@ static void test_sockmap_many_maps(void) test_sockmap_pass_prog__destroy(skel); } +static void test_sockmap_same_sock(void) +{ + struct test_sockmap_pass_prog *skel; + int stream[2], dgram, udp, tcp; + int i, err, map, zero = 0; + + skel = test_sockmap_pass_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + map = bpf_map__fd(skel->maps.sock_map_rx); + + dgram = xsocket(AF_UNIX, SOCK_DGRAM, 0); + if (dgram < 0) { + test_sockmap_pass_prog__destroy(skel); + return; + } + + tcp = connected_socket_v4(); + if (!ASSERT_GE(tcp, 0, "connected_socket_v4")) { + close(dgram); + test_sockmap_pass_prog__destroy(skel); + return; + } + + udp = xsocket(AF_INET, SOCK_DGRAM | SOCK_NONBLOCK, 0); + if (udp < 0) { + close(dgram); + close(tcp); + test_sockmap_pass_prog__destroy(skel); + return; + } + + err = socketpair(AF_UNIX, SOCK_STREAM, 0, stream); + ASSERT_OK(err, "socketpair(af_unix, sock_stream)"); + if (err) + goto out; + + for (i = 0; i < 2; i++) { + err = bpf_map_update_elem(map, &zero, &stream[0], BPF_ANY); + ASSERT_OK(err, "bpf_map_update_elem(stream)"); + } + for (i = 0; i < 2; i++) { + err = bpf_map_update_elem(map, &zero, &dgram, BPF_ANY); + ASSERT_OK(err, "bpf_map_update_elem(dgram)"); + } + for (i = 0; i < 2; i++) { + err = bpf_map_update_elem(map, &zero, &udp, BPF_ANY); + ASSERT_OK(err, "bpf_map_update_elem(udp)"); + } + for (i = 0; i < 2; i++) { + err = bpf_map_update_elem(map, &zero, &tcp, BPF_ANY); + ASSERT_OK(err, "bpf_map_update_elem(tcp)"); + } + + err = bpf_map_delete_elem(map, &zero); + ASSERT_OK(err, "bpf_map_delete_elem(entry)"); + + close(stream[0]); + close(stream[1]); +out: + close(dgram); + close(tcp); + close(udp); + test_sockmap_pass_prog__destroy(skel); +} + void test_sockmap_basic(void) { if (test__start_subtest("sockmap create_update_free")) @@ -743,4 +810,6 @@ void test_sockmap_basic(void) test_sockmap_many_socket(); if (test__start_subtest("sockmap one socket to many maps")) test_sockmap_many_maps(); + if (test__start_subtest("sockmap same socket replace")) + test_sockmap_same_sock(); }