From a30295c454725b293c7b2f45f6e60fcce0bc435e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 11 Apr 2022 12:19:08 -0700 Subject: [PATCH 01/10] tls: rx: consistently use unlocked accessors for rx_list rx_list is protected by the socket lock, no need to take the built-in spin lock on accesses. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 17c4e236ec8b3..461734a27297e 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1709,7 +1709,7 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, next_skb = skb_peek_next(skb, &ctx->rx_list); if (!is_peek) { - skb_unlink(skb, &ctx->rx_list); + __skb_unlink(skb, &ctx->rx_list); consume_skb(skb); } @@ -1824,7 +1824,7 @@ int tls_sw_recvmsg(struct sock *sk, ctx->recv_pkt = NULL; __strp_unpause(&ctx->strp); - skb_queue_tail(&ctx->rx_list, skb); + __skb_queue_tail(&ctx->rx_list, skb); if (async) { /* TLS 1.2-only, to_decrypt must be text length */ @@ -1845,7 +1845,7 @@ int tls_sw_recvmsg(struct sock *sk, if (err != __SK_PASS) { rxm->offset = rxm->offset + rxm->full_len; rxm->full_len = 0; - skb_unlink(skb, &ctx->rx_list); + __skb_unlink(skb, &ctx->rx_list); if (err == __SK_DROP) consume_skb(skb); continue; @@ -1873,7 +1873,7 @@ int tls_sw_recvmsg(struct sock *sk, decrypted += chunk; len -= chunk; - skb_unlink(skb, &ctx->rx_list); + __skb_unlink(skb, &ctx->rx_list); consume_skb(skb); /* Return full control message to userspace before trying @@ -2173,7 +2173,7 @@ void tls_sw_release_resources_rx(struct sock *sk) if (ctx->aead_recv) { kfree_skb(ctx->recv_pkt); ctx->recv_pkt = NULL; - skb_queue_purge(&ctx->rx_list); + __skb_queue_purge(&ctx->rx_list); crypto_free_aead(ctx->aead_recv); strp_stop(&ctx->strp); /* If tls_sw_strparser_arm() was not called (cleanup paths) From 0775639ce1ca953503121e350d6b885366f56a52 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 11 Apr 2022 12:19:09 -0700 Subject: [PATCH 02/10] tls: rx: reuse leave_on_list label for psock The code is identical, we can save a few LoC. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 461734a27297e..fd19047fa6c6c 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1775,14 +1775,10 @@ int tls_sw_recvmsg(struct sock *sk, skb = tls_wait_data(sk, psock, flags & MSG_DONTWAIT, timeo, &err); if (!skb) { if (psock) { - int ret = sk_msg_recvmsg(sk, psock, msg, len, - flags); - - if (ret > 0) { - decrypted += ret; - len -= ret; - continue; - } + chunk = sk_msg_recvmsg(sk, psock, msg, len, + flags); + if (chunk > 0) + goto leave_on_list; } goto recv_end; } From 284b4d93daee56dff3e10029ddf2e03227f50dbf Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 11 Apr 2022 12:19:10 -0700 Subject: [PATCH 03/10] tls: rx: move counting TlsDecryptErrors for sync Move counting TlsDecryptErrors to tls_do_decryption() where differences between sync and async crypto are reconciled. No functional changes, this code just always gave me a pause. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index fd19047fa6c6c..fd970649d0045 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -270,6 +270,8 @@ static int tls_do_decryption(struct sock *sk, ret = crypto_wait_req(ret, &ctx->async_wait); } + if (ret == -EBADMSG) + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTERROR); if (async) atomic_dec(&ctx->decrypt_pending); @@ -1584,8 +1586,6 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, if (err < 0) { if (err == -EINPROGRESS) tls_advance_record_sn(sk, prot, &tls_ctx->rx); - else if (err == -EBADMSG) - TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTERROR); return err; } From 72f3ad73bc866a53340cce6a0ad500d29295a8b8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 11 Apr 2022 12:19:11 -0700 Subject: [PATCH 04/10] tls: rx: don't handle TLS 1.3 in the async crypto callback Async crypto never worked with TLS 1.3 and was explicitly disabled in commit 8497ded2d16c ("net/tls: Disable async decrytion for tls1.3"). There's no need for us to handle TLS 1.3 padding in the async cb. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index fd970649d0045..d0b7078ca3ec9 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -188,17 +188,12 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err) tls_err_abort(skb->sk, err); } else { struct strp_msg *rxm = strp_msg(skb); - int pad; - pad = padding_length(prot, skb); - if (pad < 0) { - ctx->async_wait.err = pad; - tls_err_abort(skb->sk, pad); - } else { - rxm->full_len -= pad; - rxm->offset += prot->prepend_size; - rxm->full_len -= prot->overhead_size; - } + /* No TLS 1.3 support with async crypto */ + WARN_ON(prot->tail_size); + + rxm->offset += prot->prepend_size; + rxm->full_len -= prot->overhead_size; } /* After using skb->sk to propagate sk through crypto async callback From 1c699ffa48a15710746989c36a82cbfb07e8d17f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 11 Apr 2022 12:19:12 -0700 Subject: [PATCH 05/10] tls: rx: assume crypto always calls our callback If crypto didn't always invoke our callback for async we'd not be clearing skb->sk and would crash in the skb core when freeing it. This if must be dead code. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index d0b7078ca3ec9..fd1a7ccc22bb7 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -268,9 +268,6 @@ static int tls_do_decryption(struct sock *sk, if (ret == -EBADMSG) TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTERROR); - if (async) - atomic_dec(&ctx->decrypt_pending); - return ret; } From 4dcdd971b9c7a5c38f65d81f7c548fea2e337373 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 11 Apr 2022 12:19:13 -0700 Subject: [PATCH 06/10] tls: rx: treat process_rx_list() errors as transient process_rx_list() only fails if it can't copy data to user space. There is no point recording the error onto sk->sk_err or giving up on the data which was read partially. Treat the return value like a normal socket partial read. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index fd1a7ccc22bb7..c1ba64bfe228d 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1650,7 +1650,7 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, err = tls_record_content_type(msg, tlm, control); if (err <= 0) - return err; + goto out; if (skip < rxm->full_len) break; @@ -1668,13 +1668,13 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, err = tls_record_content_type(msg, tlm, control); if (err <= 0) - return err; + goto out; if (!zc || (rxm->full_len - skip) > len) { err = skb_copy_datagram_msg(skb, rxm->offset + skip, msg, chunk); if (err < 0) - return err; + goto out; } len = len - chunk; @@ -1707,8 +1707,10 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, skb = next_skb; } + err = 0; - return copied; +out: + return copied ? : err; } int tls_sw_recvmsg(struct sock *sk, @@ -1744,10 +1746,8 @@ int tls_sw_recvmsg(struct sock *sk, /* Process pending decrypted records. It must be non-zero-copy */ err = process_rx_list(ctx, msg, &control, 0, len, false, is_peek); - if (err < 0) { - tls_err_abort(sk, err); + if (err < 0) goto end; - } copied = err; if (len <= copied) @@ -1899,11 +1899,7 @@ int tls_sw_recvmsg(struct sock *sk, else err = process_rx_list(ctx, msg, &control, 0, decrypted, true, is_peek); - if (err < 0) { - tls_err_abort(sk, err); - copied = 0; - goto end; - } + decrypted = max(err, 0); } copied += decrypted; From f314bfee81b1bf8e01168177b2f65f24eb8da63a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 11 Apr 2022 12:19:14 -0700 Subject: [PATCH 07/10] tls: rx: return the already-copied data on crypto error async crypto handler will report the socket error no need to report it again. We can, however, let the data we already copied be reported to user space but we need to make sure the error will be reported next time around. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index c1ba64bfe228d..73c31f38dfe93 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1744,6 +1744,11 @@ int tls_sw_recvmsg(struct sock *sk, lock_sock(sk); bpf_strp_enabled = sk_psock_strp_enabled(psock); + /* If crypto failed the connection is broken */ + err = ctx->async_wait.err; + if (err) + goto end; + /* Process pending decrypted records. It must be non-zero-copy */ err = process_rx_list(ctx, msg, &control, 0, len, false, is_peek); if (err < 0) @@ -1874,7 +1879,7 @@ int tls_sw_recvmsg(struct sock *sk, recv_end: if (async) { - int pending; + int ret, pending; /* Wait for all previously submitted records to be decrypted */ spin_lock_bh(&ctx->decrypt_compl_lock); @@ -1882,11 +1887,10 @@ int tls_sw_recvmsg(struct sock *sk, pending = atomic_read(&ctx->decrypt_pending); spin_unlock_bh(&ctx->decrypt_compl_lock); if (pending) { - err = crypto_wait_req(-EINPROGRESS, &ctx->async_wait); - if (err) { - /* one of async decrypt failed */ - tls_err_abort(sk, err); - copied = 0; + ret = crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + if (ret) { + if (err >= 0 || err == -EINPROGRESS) + err = ret; decrypted = 0; goto end; } From 3547a1f9d988d88ecff4fc365d2773037c849f49 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 11 Apr 2022 12:19:15 -0700 Subject: [PATCH 08/10] tls: rx: use async as an in-out argument Propagating EINPROGRESS thru multiple layers of functions is error prone. Use darg->async as an in/out argument, like we use darg->zc today. On input it tells the code if async is allowed, on output if it took place. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 73c31f38dfe93..2f44f57f216a3 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -227,7 +227,7 @@ static int tls_do_decryption(struct sock *sk, char *iv_recv, size_t data_len, struct aead_request *aead_req, - bool async) + struct tls_decrypt_arg *darg) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_prot_info *prot = &tls_ctx->prot_info; @@ -240,7 +240,7 @@ static int tls_do_decryption(struct sock *sk, data_len + prot->tag_size, (u8 *)iv_recv); - if (async) { + if (darg->async) { /* Using skb->sk to push sk through to crypto async callback * handler. This allows propagating errors up to the socket * if needed. It _must_ be cleared in the async handler @@ -260,11 +260,13 @@ static int tls_do_decryption(struct sock *sk, ret = crypto_aead_decrypt(aead_req); if (ret == -EINPROGRESS) { - if (async) - return ret; + if (darg->async) + return 0; ret = crypto_wait_req(ret, &ctx->async_wait); } + darg->async = false; + if (ret == -EBADMSG) TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTERROR); @@ -1536,9 +1538,9 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, /* Prepare and submit AEAD request */ err = tls_do_decryption(sk, skb, sgin, sgout, iv, - data_len, aead_req, darg->async); - if (err == -EINPROGRESS) - return err; + data_len, aead_req, darg); + if (darg->async) + return 0; /* Release the pages in case iov was mapped to pages */ for (; pages > 0; pages--) @@ -1575,11 +1577,10 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, } err = decrypt_internal(sk, skb, dest, NULL, darg); - if (err < 0) { - if (err == -EINPROGRESS) - tls_advance_record_sn(sk, prot, &tls_ctx->rx); + if (err < 0) return err; - } + if (darg->async) + goto decrypt_next; decrypt_done: pad = padding_length(prot, skb); @@ -1589,8 +1590,9 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, rxm->full_len -= pad; rxm->offset += prot->prepend_size; rxm->full_len -= prot->overhead_size; - tls_advance_record_sn(sk, prot, &tls_ctx->rx); tlm->decrypted = 1; +decrypt_next: + tls_advance_record_sn(sk, prot, &tls_ctx->rx); return 0; } @@ -1796,13 +1798,12 @@ int tls_sw_recvmsg(struct sock *sk, darg.async = false; err = decrypt_skb_update(sk, skb, &msg->msg_iter, &darg); - if (err < 0 && err != -EINPROGRESS) { + if (err < 0) { tls_err_abort(sk, -EBADMSG); goto recv_end; } - if (err == -EINPROGRESS) - async = true; + async |= darg.async; /* If the type of records being processed is not known yet, * set it to record type just dequeued. If it is already known, From f7d45f4b52fe259c152139f1f6b2f80474b7b96f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 11 Apr 2022 12:19:16 -0700 Subject: [PATCH 09/10] tls: rx: use MAX_IV_SIZE for allocations IVs are 8 or 16 bytes, no point reading out the exact value for quantities this small. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 2f44f57f216a3..465d902f5bb9b 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1452,7 +1452,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, aead_size = sizeof(*aead_req) + crypto_aead_reqsize(ctx->aead_recv); mem_size = aead_size + (nsg * sizeof(struct scatterlist)); mem_size = mem_size + prot->aad_size; - mem_size = mem_size + crypto_aead_ivsize(ctx->aead_recv); + mem_size = mem_size + MAX_IV_SIZE; /* Allocate a single block of memory which contains * aead_req || sgin[] || sgout[] || aad || iv. From a4ae58cdb6e8ed6b00428f65515d5948e1b56deb Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 11 Apr 2022 12:19:17 -0700 Subject: [PATCH 10/10] tls: rx: only copy IV from the packet for TLS 1.2 TLS 1.3 and ChaChaPoly don't carry IV in the packet. The code before this change would copy out iv_size worth of whatever followed the TLS header in the packet and then for TLS 1.3 | ChaCha overwrite that with the sequence number. Waste of cycles especially with TLS 1.2 being close to dead and TLS 1.3 being the common case. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 465d902f5bb9b..ddbe05ec5489d 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1482,20 +1482,20 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, } /* Prepare IV */ - err = skb_copy_bits(skb, rxm->offset + TLS_HEADER_SIZE, - iv + iv_offset + prot->salt_size, - prot->iv_size); - if (err < 0) { - kfree(mem); - return err; - } if (prot->version == TLS_1_3_VERSION || - prot->cipher_type == TLS_CIPHER_CHACHA20_POLY1305) + prot->cipher_type == TLS_CIPHER_CHACHA20_POLY1305) { memcpy(iv + iv_offset, tls_ctx->rx.iv, prot->iv_size + prot->salt_size); - else + } else { + err = skb_copy_bits(skb, rxm->offset + TLS_HEADER_SIZE, + iv + iv_offset + prot->salt_size, + prot->iv_size); + if (err < 0) { + kfree(mem); + return err; + } memcpy(iv + iv_offset, tls_ctx->rx.iv, prot->salt_size); - + } xor_iv_with_seq(prot, iv + iv_offset, tls_ctx->rx.rec_seq); /* Prepare AAD */