From c57ca512f3b68ddcd62bda9cc24a8f5584ab01b1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 Feb 2024 17:18:18 -0800 Subject: [PATCH 1/7] net: tls: factor out tls_*crypt_async_wait() Factor out waiting for async encrypt and decrypt to finish. There are already multiple copies and a subsequent fix will need more. No functional changes. Note that crypto_wait_req() returns wait->err Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 96 +++++++++++++++++++++++------------------------- 1 file changed, 45 insertions(+), 51 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 31e8a94dfc111..6a73714f34cc4 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -230,6 +230,20 @@ static void tls_decrypt_done(void *data, int err) spin_unlock_bh(&ctx->decrypt_compl_lock); } +static int tls_decrypt_async_wait(struct tls_sw_context_rx *ctx) +{ + int pending; + + spin_lock_bh(&ctx->decrypt_compl_lock); + reinit_completion(&ctx->async_wait.completion); + pending = atomic_read(&ctx->decrypt_pending); + spin_unlock_bh(&ctx->decrypt_compl_lock); + if (pending) + crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + + return ctx->async_wait.err; +} + static int tls_do_decryption(struct sock *sk, struct scatterlist *sgin, struct scatterlist *sgout, @@ -495,6 +509,28 @@ static void tls_encrypt_done(void *data, int err) schedule_delayed_work(&ctx->tx_work.work, 1); } +static int tls_encrypt_async_wait(struct tls_sw_context_tx *ctx) +{ + int pending; + + spin_lock_bh(&ctx->encrypt_compl_lock); + ctx->async_notify = true; + + pending = atomic_read(&ctx->encrypt_pending); + spin_unlock_bh(&ctx->encrypt_compl_lock); + if (pending) + crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + else + reinit_completion(&ctx->async_wait.completion); + + /* There can be no concurrent accesses, since we have no + * pending encrypt operations + */ + WRITE_ONCE(ctx->async_notify, false); + + return ctx->async_wait.err; +} + static int tls_do_encryption(struct sock *sk, struct tls_context *tls_ctx, struct tls_sw_context_tx *ctx, @@ -984,7 +1020,6 @@ static int tls_sw_sendmsg_locked(struct sock *sk, struct msghdr *msg, int num_zc = 0; int orig_size; int ret = 0; - int pending; if (!eor && (msg->msg_flags & MSG_EOR)) return -EINVAL; @@ -1163,24 +1198,12 @@ static int tls_sw_sendmsg_locked(struct sock *sk, struct msghdr *msg, if (!num_async) { goto send_end; } else if (num_zc) { - /* Wait for pending encryptions to get completed */ - spin_lock_bh(&ctx->encrypt_compl_lock); - ctx->async_notify = true; - - pending = atomic_read(&ctx->encrypt_pending); - spin_unlock_bh(&ctx->encrypt_compl_lock); - if (pending) - crypto_wait_req(-EINPROGRESS, &ctx->async_wait); - else - reinit_completion(&ctx->async_wait.completion); - - /* There can be no concurrent accesses, since we have no - * pending encrypt operations - */ - WRITE_ONCE(ctx->async_notify, false); + int err; - if (ctx->async_wait.err) { - ret = ctx->async_wait.err; + /* Wait for pending encryptions to get completed */ + err = tls_encrypt_async_wait(ctx); + if (err) { + ret = err; copied = 0; } } @@ -1229,7 +1252,6 @@ void tls_sw_splice_eof(struct socket *sock) ssize_t copied = 0; bool retrying = false; int ret = 0; - int pending; if (!ctx->open_rec) return; @@ -1264,22 +1286,7 @@ void tls_sw_splice_eof(struct socket *sock) } /* Wait for pending encryptions to get completed */ - spin_lock_bh(&ctx->encrypt_compl_lock); - ctx->async_notify = true; - - pending = atomic_read(&ctx->encrypt_pending); - spin_unlock_bh(&ctx->encrypt_compl_lock); - if (pending) - crypto_wait_req(-EINPROGRESS, &ctx->async_wait); - else - reinit_completion(&ctx->async_wait.completion); - - /* There can be no concurrent accesses, since we have no pending - * encrypt operations - */ - WRITE_ONCE(ctx->async_notify, false); - - if (ctx->async_wait.err) + if (tls_encrypt_async_wait(ctx)) goto unlock; /* Transmit if any encryptions have completed */ @@ -2109,16 +2116,10 @@ int tls_sw_recvmsg(struct sock *sk, recv_end: if (async) { - int ret, pending; + int ret; /* Wait for all previously submitted records to be decrypted */ - spin_lock_bh(&ctx->decrypt_compl_lock); - reinit_completion(&ctx->async_wait.completion); - pending = atomic_read(&ctx->decrypt_pending); - spin_unlock_bh(&ctx->decrypt_compl_lock); - ret = 0; - if (pending) - ret = crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + ret = tls_decrypt_async_wait(ctx); __skb_queue_purge(&ctx->async_hold); if (ret) { @@ -2435,16 +2436,9 @@ void tls_sw_release_resources_tx(struct sock *sk) struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct tls_rec *rec, *tmp; - int pending; /* Wait for any pending async encryptions to complete */ - spin_lock_bh(&ctx->encrypt_compl_lock); - ctx->async_notify = true; - pending = atomic_read(&ctx->encrypt_pending); - spin_unlock_bh(&ctx->encrypt_compl_lock); - - if (pending) - crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + tls_encrypt_async_wait(ctx); tls_tx_records(sk, -1); From aec7961916f3f9e88766e2688992da6980f11b8d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 Feb 2024 17:18:19 -0800 Subject: [PATCH 2/7] tls: fix race between async notify and socket close The submitting thread (one which called recvmsg/sendmsg) may exit as soon as the async crypto handler calls complete() so any code past that point risks touching already freed data. Try to avoid the locking and extra flags altogether. Have the main thread hold an extra reference, this way we can depend solely on the atomic ref counter for synchronization. Don't futz with reiniting the completion, either, we are now tightly controlling when completion fires. Reported-by: valis Fixes: 0cada33241d9 ("net/tls: fix race condition causing kernel panic") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Reviewed-by: Eric Dumazet Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller --- include/net/tls.h | 5 ----- net/tls/tls_sw.c | 43 ++++++++++--------------------------------- 2 files changed, 10 insertions(+), 38 deletions(-) diff --git a/include/net/tls.h b/include/net/tls.h index 962f0c501111b..340ad43971e47 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -97,9 +97,6 @@ struct tls_sw_context_tx { struct tls_rec *open_rec; struct list_head tx_list; atomic_t encrypt_pending; - /* protect crypto_wait with encrypt_pending */ - spinlock_t encrypt_compl_lock; - int async_notify; u8 async_capable:1; #define BIT_TX_SCHEDULED 0 @@ -136,8 +133,6 @@ struct tls_sw_context_rx { struct tls_strparser strp; atomic_t decrypt_pending; - /* protect crypto_wait with decrypt_pending*/ - spinlock_t decrypt_compl_lock; struct sk_buff_head async_hold; struct wait_queue_head wq; }; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 6a73714f34cc4..635305bebfef6 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -224,22 +224,15 @@ static void tls_decrypt_done(void *data, int err) kfree(aead_req); - spin_lock_bh(&ctx->decrypt_compl_lock); - if (!atomic_dec_return(&ctx->decrypt_pending)) + if (atomic_dec_and_test(&ctx->decrypt_pending)) complete(&ctx->async_wait.completion); - spin_unlock_bh(&ctx->decrypt_compl_lock); } static int tls_decrypt_async_wait(struct tls_sw_context_rx *ctx) { - int pending; - - spin_lock_bh(&ctx->decrypt_compl_lock); - reinit_completion(&ctx->async_wait.completion); - pending = atomic_read(&ctx->decrypt_pending); - spin_unlock_bh(&ctx->decrypt_compl_lock); - if (pending) + if (!atomic_dec_and_test(&ctx->decrypt_pending)) crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + atomic_inc(&ctx->decrypt_pending); return ctx->async_wait.err; } @@ -267,6 +260,7 @@ static int tls_do_decryption(struct sock *sk, aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, tls_decrypt_done, aead_req); + DEBUG_NET_WARN_ON_ONCE(atomic_read(&ctx->decrypt_pending) < 1); atomic_inc(&ctx->decrypt_pending); } else { aead_request_set_callback(aead_req, @@ -455,7 +449,6 @@ static void tls_encrypt_done(void *data, int err) struct sk_msg *msg_en; bool ready = false; struct sock *sk; - int pending; msg_en = &rec->msg_encrypted; @@ -494,12 +487,8 @@ static void tls_encrypt_done(void *data, int err) ready = true; } - spin_lock_bh(&ctx->encrypt_compl_lock); - pending = atomic_dec_return(&ctx->encrypt_pending); - - if (!pending && ctx->async_notify) + if (atomic_dec_and_test(&ctx->encrypt_pending)) complete(&ctx->async_wait.completion); - spin_unlock_bh(&ctx->encrypt_compl_lock); if (!ready) return; @@ -511,22 +500,9 @@ static void tls_encrypt_done(void *data, int err) static int tls_encrypt_async_wait(struct tls_sw_context_tx *ctx) { - int pending; - - spin_lock_bh(&ctx->encrypt_compl_lock); - ctx->async_notify = true; - - pending = atomic_read(&ctx->encrypt_pending); - spin_unlock_bh(&ctx->encrypt_compl_lock); - if (pending) + if (!atomic_dec_and_test(&ctx->encrypt_pending)) crypto_wait_req(-EINPROGRESS, &ctx->async_wait); - else - reinit_completion(&ctx->async_wait.completion); - - /* There can be no concurrent accesses, since we have no - * pending encrypt operations - */ - WRITE_ONCE(ctx->async_notify, false); + atomic_inc(&ctx->encrypt_pending); return ctx->async_wait.err; } @@ -577,6 +553,7 @@ static int tls_do_encryption(struct sock *sk, /* Add the record in tx_list */ list_add_tail((struct list_head *)&rec->list, &ctx->tx_list); + DEBUG_NET_WARN_ON_ONCE(atomic_read(&ctx->encrypt_pending) < 1); atomic_inc(&ctx->encrypt_pending); rc = crypto_aead_encrypt(aead_req); @@ -2601,7 +2578,7 @@ static struct tls_sw_context_tx *init_ctx_tx(struct tls_context *ctx, struct soc } crypto_init_wait(&sw_ctx_tx->async_wait); - spin_lock_init(&sw_ctx_tx->encrypt_compl_lock); + atomic_set(&sw_ctx_tx->encrypt_pending, 1); INIT_LIST_HEAD(&sw_ctx_tx->tx_list); INIT_DELAYED_WORK(&sw_ctx_tx->tx_work.work, tx_work_handler); sw_ctx_tx->tx_work.sk = sk; @@ -2622,7 +2599,7 @@ static struct tls_sw_context_rx *init_ctx_rx(struct tls_context *ctx) } crypto_init_wait(&sw_ctx_rx->async_wait); - spin_lock_init(&sw_ctx_rx->decrypt_compl_lock); + atomic_set(&sw_ctx_rx->decrypt_pending, 1); init_waitqueue_head(&sw_ctx_rx->wq); skb_queue_head_init(&sw_ctx_rx->rx_list); skb_queue_head_init(&sw_ctx_rx->async_hold); From e01e3934a1b2d122919f73bc6ddbe1cdafc4bbdb Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 Feb 2024 17:18:20 -0800 Subject: [PATCH 3/7] tls: fix race between tx work scheduling and socket close Similarly to previous commit, the submitting thread (recvmsg/sendmsg) may exit as soon as the async crypto handler calls complete(). Reorder scheduling the work before calling complete(). This seems more logical in the first place, as it's the inverse order of what the submitting thread will do. Reported-by: valis Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 635305bebfef6..9374a61cef00b 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -447,7 +447,6 @@ static void tls_encrypt_done(void *data, int err) struct tls_rec *rec = data; struct scatterlist *sge; struct sk_msg *msg_en; - bool ready = false; struct sock *sk; msg_en = &rec->msg_encrypted; @@ -483,19 +482,16 @@ static void tls_encrypt_done(void *data, int err) /* If received record is at head of tx_list, schedule tx */ first_rec = list_first_entry(&ctx->tx_list, struct tls_rec, list); - if (rec == first_rec) - ready = true; + if (rec == first_rec) { + /* Schedule the transmission */ + if (!test_and_set_bit(BIT_TX_SCHEDULED, + &ctx->tx_bitmask)) + schedule_delayed_work(&ctx->tx_work.work, 1); + } } if (atomic_dec_and_test(&ctx->encrypt_pending)) complete(&ctx->async_wait.completion); - - if (!ready) - return; - - /* Schedule the transmission */ - if (!test_and_set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) - schedule_delayed_work(&ctx->tx_work.work, 1); } static int tls_encrypt_async_wait(struct tls_sw_context_tx *ctx) From 8590541473188741055d27b955db0777569438e3 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 Feb 2024 17:18:21 -0800 Subject: [PATCH 4/7] net: tls: handle backlogging of crypto requests Since we're setting the CRYPTO_TFM_REQ_MAY_BACKLOG flag on our requests to the crypto API, crypto_aead_{encrypt,decrypt} can return -EBUSY instead of -EINPROGRESS in valid situations. For example, when the cryptd queue for AESNI is full (easy to trigger with an artificially low cryptd.cryptd_max_cpu_qlen), requests will be enqueued to the backlog but still processed. In that case, the async callback will also be called twice: first with err == -EINPROGRESS, which it seems we can just ignore, then with err == 0. Compared to Sabrina's original patch this version uses the new tls_*crypt_async_wait() helpers and converts the EBUSY to EINPROGRESS to avoid having to modify all the error handling paths. The handling is identical. Fixes: a54667f6728c ("tls: Add support for encryption using async offload accelerator") Fixes: 94524d8fc965 ("net/tls: Add support for async decryption of tls records") Co-developed-by: Sabrina Dubroca Signed-off-by: Sabrina Dubroca Link: https://lore.kernel.org/netdev/9681d1febfec295449a62300938ed2ae66983f28.1694018970.git.sd@queasysnail.net/ Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 9374a61cef00b..63bef5666e36d 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -196,6 +196,17 @@ static void tls_decrypt_done(void *data, int err) struct sock *sk; int aead_size; + /* If requests get too backlogged crypto API returns -EBUSY and calls + * ->complete(-EINPROGRESS) immediately followed by ->complete(0) + * to make waiting for backlog to flush with crypto_wait_req() easier. + * First wait converts -EBUSY -> -EINPROGRESS, and the second one + * -EINPROGRESS -> 0. + * We have a single struct crypto_async_request per direction, this + * scheme doesn't help us, so just ignore the first ->complete(). + */ + if (err == -EINPROGRESS) + return; + aead_size = sizeof(*aead_req) + crypto_aead_reqsize(aead); aead_size = ALIGN(aead_size, __alignof__(*dctx)); dctx = (void *)((u8 *)aead_req + aead_size); @@ -269,6 +280,10 @@ static int tls_do_decryption(struct sock *sk, } ret = crypto_aead_decrypt(aead_req); + if (ret == -EBUSY) { + ret = tls_decrypt_async_wait(ctx); + ret = ret ?: -EINPROGRESS; + } if (ret == -EINPROGRESS) { if (darg->async) return 0; @@ -449,6 +464,9 @@ static void tls_encrypt_done(void *data, int err) struct sk_msg *msg_en; struct sock *sk; + if (err == -EINPROGRESS) /* see the comment in tls_decrypt_done() */ + return; + msg_en = &rec->msg_encrypted; sk = rec->sk; @@ -553,6 +571,10 @@ static int tls_do_encryption(struct sock *sk, atomic_inc(&ctx->encrypt_pending); rc = crypto_aead_encrypt(aead_req); + if (rc == -EBUSY) { + rc = tls_encrypt_async_wait(ctx); + rc = rc ?: -EINPROGRESS; + } if (!rc || rc != -EINPROGRESS) { atomic_dec(&ctx->encrypt_pending); sge->offset -= prot->prepend_size; From 32b55c5ff9103b8508c1e04bfa5a08c64e7a925f Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 6 Feb 2024 17:18:22 -0800 Subject: [PATCH 5/7] net: tls: fix use-after-free with partial reads and async decrypt tls_decrypt_sg doesn't take a reference on the pages from clear_skb, so the put_page() in tls_decrypt_done releases them, and we trigger a use-after-free in process_rx_list when we try to read from the partially-read skb. Fixes: fd31f3996af2 ("tls: rx: decrypt into a fresh skb") Signed-off-by: Sabrina Dubroca Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 63bef5666e36d..a6eff21ade23e 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -63,6 +63,7 @@ struct tls_decrypt_ctx { u8 iv[TLS_MAX_IV_SIZE]; u8 aad[TLS_MAX_AAD_SIZE]; u8 tail; + bool free_sgout; struct scatterlist sg[]; }; @@ -187,7 +188,6 @@ static void tls_decrypt_done(void *data, int err) struct aead_request *aead_req = data; struct crypto_aead *aead = crypto_aead_reqtfm(aead_req); struct scatterlist *sgout = aead_req->dst; - struct scatterlist *sgin = aead_req->src; struct tls_sw_context_rx *ctx; struct tls_decrypt_ctx *dctx; struct tls_context *tls_ctx; @@ -224,7 +224,7 @@ static void tls_decrypt_done(void *data, int err) } /* Free the destination pages if skb was not decrypted inplace */ - if (sgout != sgin) { + if (dctx->free_sgout) { /* Skip the first S/G entry as it points to AAD */ for_each_sg(sg_next(sgout), sg, UINT_MAX, pages) { if (!sg) @@ -1583,6 +1583,7 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov, } else if (out_sg) { memcpy(sgout, out_sg, n_sgout * sizeof(*sgout)); } + dctx->free_sgout = !!pages; /* Prepare and submit AEAD request */ err = tls_do_decryption(sk, sgin, sgout, dctx->iv, From 49d821064c44cb5ffdf272905236012ea9ce50e3 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 Feb 2024 17:18:23 -0800 Subject: [PATCH 6/7] selftests: tls: use exact comparison in recv_partial This exact case was fail for async crypto and we weren't catching it. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- tools/testing/selftests/net/tls.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 7799e042a9719..bc36c91c4480f 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -1002,12 +1002,12 @@ TEST_F(tls, recv_partial) memset(recv_mem, 0, sizeof(recv_mem)); EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); - EXPECT_NE(recv(self->cfd, recv_mem, strlen(test_str_first), - MSG_WAITALL), -1); + EXPECT_EQ(recv(self->cfd, recv_mem, strlen(test_str_first), + MSG_WAITALL), strlen(test_str_first)); EXPECT_EQ(memcmp(test_str_first, recv_mem, strlen(test_str_first)), 0); memset(recv_mem, 0, sizeof(recv_mem)); - EXPECT_NE(recv(self->cfd, recv_mem, strlen(test_str_second), - MSG_WAITALL), -1); + EXPECT_EQ(recv(self->cfd, recv_mem, strlen(test_str_second), + MSG_WAITALL), strlen(test_str_second)); EXPECT_EQ(memcmp(test_str_second, recv_mem, strlen(test_str_second)), 0); } From ac437a51ce662364062f704e321227f6728e6adc Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 Feb 2024 17:18:24 -0800 Subject: [PATCH 7/7] net: tls: fix returned read length with async decrypt We double count async, non-zc rx data. The previous fix was lucky because if we fully zc async_copy_bytes is 0 so we add 0. Decrypted already has all the bytes we handled, in all cases. We don't have to adjust anything, delete the erroneous line. Fixes: 4d42cd6bc2ac ("tls: rx: fix return value for async crypto") Co-developed-by: Sabrina Dubroca Signed-off-by: Sabrina Dubroca Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index a6eff21ade23e..9fbc70200cd0f 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2132,7 +2132,6 @@ int tls_sw_recvmsg(struct sock *sk, else err = process_rx_list(ctx, msg, &control, 0, async_copy_bytes, is_peek); - decrypted += max(err, 0); } copied += decrypted;