From 7608a971fdeb4c3eefa522d1bfe8d4bc6b2481cc Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 25 Mar 2024 16:56:45 +0100 Subject: [PATCH 1/4] tls: recv: process_rx_list shouldn't use an offset with kvec Only MSG_PEEK needs to copy from an offset during the final process_rx_list call, because the bytes we copied at the beginning of tls_sw_recvmsg were left on the rx_list. In the KVEC case, we removed data from the rx_list as we were copying it, so there's no need to use an offset, just like in the normal case. Fixes: 692d7b5d1f91 ("tls: Fix recvmsg() to be able to peek across multiple records") Signed-off-by: Sabrina Dubroca Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/e5487514f828e0347d2b92ca40002c62b58af73d.1711120964.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski --- net/tls/tls_sw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 211f57164cb61..3cdc6bc9fba69 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2152,7 +2152,7 @@ int tls_sw_recvmsg(struct sock *sk, } /* Drain records from the rx_list & copy if required */ - if (is_peek || is_kvec) + if (is_peek) err = process_rx_list(ctx, msg, &control, copied + peeked, decrypted - peeked, is_peek, NULL); else From 85eef9a41d019b59be7bc91793f26251909c0710 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 25 Mar 2024 16:56:46 +0100 Subject: [PATCH 2/4] tls: adjust recv return with async crypto and failed copy to userspace process_rx_list may not copy as many bytes as we want to the userspace buffer, for example in case we hit an EFAULT during the copy. If this happens, we should only count the bytes that were actually copied, which may be 0. Subtracting async_copy_bytes is correct in both peek and !peek cases, because decrypted == async_copy_bytes + peeked for the peek case: peek is always !ZC, and we can go through either the sync or async path. In the async case, we add chunk to both decrypted and async_copy_bytes. In the sync case, we add chunk to both decrypted and peeked. I missed that in commit 6caaf104423d ("tls: fix peeking with sync+async decryption"). Fixes: 4d42cd6bc2ac ("tls: rx: fix return value for async crypto") Signed-off-by: Sabrina Dubroca Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/1b5a1eaab3c088a9dd5d9f1059ceecd7afe888d1.1711120964.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski --- net/tls/tls_sw.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 3cdc6bc9fba69..14faf6189eb14 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2158,6 +2158,9 @@ int tls_sw_recvmsg(struct sock *sk, else err = process_rx_list(ctx, msg, &control, 0, async_copy_bytes, is_peek, NULL); + + /* we could have copied less than we wanted, and possibly nothing */ + decrypted += max(err, 0) - async_copy_bytes; } copied += decrypted; From dc54b813df63020e946ccdef35b64d4fa99fd622 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 25 Mar 2024 16:56:47 +0100 Subject: [PATCH 3/4] selftests: tls: add test with a partially invalid iov Make sure that we don't return more bytes than we actually received if the userspace buffer was bogus. We expect to receive at least the rest of rec1, and possibly some of rec2 (currently, we don't, but that would be ok). Signed-off-by: Sabrina Dubroca Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/720e61b3d3eab40af198a58ce2cd1ee019f0ceb1.1711120964.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tls.c | 34 +++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index c6eda21cefb6b..f27a12d2a2c99 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -1615,6 +1615,40 @@ TEST_F(tls, getsockopt) EXPECT_EQ(errno, EINVAL); } +TEST_F(tls, recv_efault) +{ + char *rec1 = "1111111111"; + char *rec2 = "2222222222"; + struct msghdr hdr = {}; + struct iovec iov[2]; + char recv_mem[12]; + int ret; + + if (self->notls) + SKIP(return, "no TLS support"); + + EXPECT_EQ(send(self->fd, rec1, 10, 0), 10); + EXPECT_EQ(send(self->fd, rec2, 10, 0), 10); + + iov[0].iov_base = recv_mem; + iov[0].iov_len = sizeof(recv_mem); + iov[1].iov_base = NULL; /* broken iov to make process_rx_list fail */ + iov[1].iov_len = 1; + + hdr.msg_iovlen = 2; + hdr.msg_iov = iov; + + EXPECT_EQ(recv(self->cfd, recv_mem, 1, 0), 1); + EXPECT_EQ(recv_mem[0], rec1[0]); + + ret = recvmsg(self->cfd, &hdr, 0); + EXPECT_LE(ret, sizeof(recv_mem)); + EXPECT_GE(ret, 9); + EXPECT_EQ(memcmp(rec1, recv_mem, 9), 0); + if (ret > 9) + EXPECT_EQ(memcmp(rec2, recv_mem + 9, ret - 9), 0); +} + FIXTURE(tls_err) { int fd, cfd; From 417e91e856099e9b8a42a2520e2255e6afe024be Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 25 Mar 2024 16:56:48 +0100 Subject: [PATCH 4/4] tls: get psock ref after taking rxlock to avoid leak At the start of tls_sw_recvmsg, we take a reference on the psock, and then call tls_rx_reader_lock. If that fails, we return directly without releasing the reference. Instead of adding a new label, just take the reference after locking has succeeded, since we don't need it before. Fixes: 4cbc325ed6b4 ("tls: rx: allow only one reader at a time") Signed-off-by: Sabrina Dubroca Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/fe2ade22d030051ce4c3638704ed58b67d0df643.1711120964.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski --- net/tls/tls_sw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 14faf6189eb14..b783231668c65 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1976,10 +1976,10 @@ int tls_sw_recvmsg(struct sock *sk, if (unlikely(flags & MSG_ERRQUEUE)) return sock_recv_errqueue(sk, msg, len, SOL_IP, IP_RECVERR); - psock = sk_psock_get(sk); err = tls_rx_reader_lock(sk, ctx, flags & MSG_DONTWAIT); if (err < 0) return err; + psock = sk_psock_get(sk); bpf_strp_enabled = sk_psock_strp_enabled(psock); /* If crypto failed the connection is broken */