From b5941f066b4ca331db225a976dae1d6ca8cf0ae3 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 27 May 2021 16:31:37 -0700 Subject: [PATCH 1/4] mptcp: fix sk_forward_memory corruption on retransmission MPTCP sk_forward_memory handling is a bit special, as such field is protected by the msk socket spin_lock, instead of the plain socket lock. Currently we have a code path updating such field without handling the relevant lock: __mptcp_retrans() -> __mptcp_clean_una_wakeup() Several helpers in __mptcp_clean_una_wakeup() will update sk_forward_alloc, possibly causing such field corruption, as reported by Matthieu. Address the issue providing and using a new variant of blamed function which explicitly acquires the msk spin lock. Fixes: 64b9cea7a0af ("mptcp: fix spurious retransmissions") Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/172 Reported-by: Matthieu Baerts Tested-by: Matthieu Baerts Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 2bc199549a887..5edc686faff15 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -947,6 +947,10 @@ static void __mptcp_update_wmem(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); +#ifdef CONFIG_LOCKDEP + WARN_ON_ONCE(!lockdep_is_held(&sk->sk_lock.slock)); +#endif + if (!msk->wmem_reserved) return; @@ -1085,10 +1089,20 @@ static void __mptcp_clean_una(struct sock *sk) static void __mptcp_clean_una_wakeup(struct sock *sk) { +#ifdef CONFIG_LOCKDEP + WARN_ON_ONCE(!lockdep_is_held(&sk->sk_lock.slock)); +#endif __mptcp_clean_una(sk); mptcp_write_space(sk); } +static void mptcp_clean_una_wakeup(struct sock *sk) +{ + mptcp_data_lock(sk); + __mptcp_clean_una_wakeup(sk); + mptcp_data_unlock(sk); +} + static void mptcp_enter_memory_pressure(struct sock *sk) { struct mptcp_subflow_context *subflow; @@ -2299,7 +2313,7 @@ static void __mptcp_retrans(struct sock *sk) struct sock *ssk; int ret; - __mptcp_clean_una_wakeup(sk); + mptcp_clean_una_wakeup(sk); dfrag = mptcp_rtx_head(sk); if (!dfrag) { if (mptcp_data_fin_enabled(msk)) { From 06f9a435b3aa12f4de6da91f11fdce8ce7b46205 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 27 May 2021 16:31:38 -0700 Subject: [PATCH 2/4] mptcp: always parse mptcp options for MPC reqsk In subflow_syn_recv_sock() we currently skip options parsing for OoO packet, given that such packets may not carry the relevant MPC option. If the peer generates an MPC+data TSO packet and some of the early segments are lost or get reorder, we server will ignore the peer key, causing transient, unexpected fallback to TCP. The solution is always parsing the incoming MPTCP options, and do the fallback only for in-order packets. This actually cleans the existing code a bit. Fixes: d22f4988ffec ("mptcp: process MP_CAPABLE data option") Reported-by: Matthieu Baerts Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/subflow.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index bde6be77ea73b..c6ee811498290 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -630,21 +630,20 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, /* if the sk is MP_CAPABLE, we try to fetch the client key */ if (subflow_req->mp_capable) { - if (TCP_SKB_CB(skb)->seq != subflow_req->ssn_offset + 1) { - /* here we can receive and accept an in-window, - * out-of-order pkt, which will not carry the MP_CAPABLE - * opt even on mptcp enabled paths - */ - goto create_msk; - } - + /* we can receive and accept an in-window, out-of-order pkt, + * which may not carry the MP_CAPABLE opt even on mptcp enabled + * paths: always try to extract the peer key, and fallback + * for packets missing it. + * Even OoO DSS packets coming legitly after dropped or + * reordered MPC will cause fallback, but we don't have other + * options. + */ mptcp_get_options(skb, &mp_opt); if (!mp_opt.mp_capable) { fallback = true; goto create_child; } -create_msk: new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req); if (!new_msk) fallback = true; From dea2b1ea9c705c5ba351a9174403fd83dbb68fc3 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 27 May 2021 16:31:39 -0700 Subject: [PATCH 3/4] mptcp: do not reset MP_CAPABLE subflow on mapping errors When some mapping related errors occurs we close the main MPC subflow with a RST. We should instead fallback gracefully to TCP, and do the reset only for MPJ subflows. Fixes: d22f4988ffec ("mptcp: process MP_CAPABLE data option") Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/192 Reported-by: Matthieu Baerts Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/subflow.c | 62 +++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 30 deletions(-) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index c6ee811498290..ef3d037f984a9 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1011,21 +1011,11 @@ static bool subflow_check_data_avail(struct sock *ssk) status = get_mapping_status(ssk, msk); trace_subflow_check_data_avail(status, skb_peek(&ssk->sk_receive_queue)); - if (status == MAPPING_INVALID) { - ssk->sk_err = EBADMSG; - goto fatal; - } - if (status == MAPPING_DUMMY) { - __mptcp_do_fallback(msk); - skb = skb_peek(&ssk->sk_receive_queue); - subflow->map_valid = 1; - subflow->map_seq = READ_ONCE(msk->ack_seq); - subflow->map_data_len = skb->len; - subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - - subflow->ssn_offset; - subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL; - return true; - } + if (unlikely(status == MAPPING_INVALID)) + goto fallback; + + if (unlikely(status == MAPPING_DUMMY)) + goto fallback; if (status != MAPPING_OK) goto no_data; @@ -1038,10 +1028,8 @@ static bool subflow_check_data_avail(struct sock *ssk) * MP_CAPABLE-based mapping */ if (unlikely(!READ_ONCE(msk->can_ack))) { - if (!subflow->mpc_map) { - ssk->sk_err = EBADMSG; - goto fatal; - } + if (!subflow->mpc_map) + goto fallback; WRITE_ONCE(msk->remote_key, subflow->remote_key); WRITE_ONCE(msk->ack_seq, subflow->map_seq); WRITE_ONCE(msk->can_ack, true); @@ -1069,17 +1057,31 @@ static bool subflow_check_data_avail(struct sock *ssk) no_data: subflow_sched_work_if_closed(msk, ssk); return false; -fatal: - /* fatal protocol error, close the socket */ - /* This barrier is coupled with smp_rmb() in tcp_poll() */ - smp_wmb(); - ssk->sk_error_report(ssk); - tcp_set_state(ssk, TCP_CLOSE); - subflow->reset_transient = 0; - subflow->reset_reason = MPTCP_RST_EMPTCP; - tcp_send_active_reset(ssk, GFP_ATOMIC); - subflow->data_avail = 0; - return false; + +fallback: + /* RFC 8684 section 3.7. */ + if (subflow->mp_join || subflow->fully_established) { + /* fatal protocol error, close the socket. + * subflow_error_report() will introduce the appropriate barriers + */ + ssk->sk_err = EBADMSG; + ssk->sk_error_report(ssk); + tcp_set_state(ssk, TCP_CLOSE); + subflow->reset_transient = 0; + subflow->reset_reason = MPTCP_RST_EMPTCP; + tcp_send_active_reset(ssk, GFP_ATOMIC); + subflow->data_avail = 0; + return false; + } + + __mptcp_do_fallback(msk); + skb = skb_peek(&ssk->sk_receive_queue); + subflow->map_valid = 1; + subflow->map_seq = READ_ONCE(msk->ack_seq); + subflow->map_data_len = skb->len; + subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - subflow->ssn_offset; + subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL; + return true; } bool mptcp_subflow_data_available(struct sock *sk) From 69ca3d29a75554122b998e8dfa20117766f52f48 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 27 May 2021 16:31:40 -0700 Subject: [PATCH 4/4] mptcp: update selftest for fallback due to OoO The previous commit noted that we can have fallback scenario due to OoO (or packet drop). Update the self-tests accordingly Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_connect.sh | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 3c4cb72ed8a4a..9ca5f1ba461ec 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -501,6 +501,7 @@ do_transfer() local stat_ackrx_now_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") local stat_cookietx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent") local stat_cookierx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv") + local stat_ooo_now=$(get_mib_counter "${listener_ns}" "TcpExtTCPOFOQueue") expect_synrx=$((stat_synrx_last_l)) expect_ackrx=$((stat_ackrx_last_l)) @@ -518,10 +519,14 @@ do_transfer() "${stat_synrx_now_l}" "${expect_synrx}" 1>&2 retc=1 fi - if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ]; then - printf "[ FAIL ] lower MPC ACK rx (%d) than expected (%d)\n" \ - "${stat_ackrx_now_l}" "${expect_ackrx}" 1>&2 - rets=1 + if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} -a ${stat_ooo_now} -eq 0 ]; then + if [ ${stat_ooo_now} -eq 0 ]; then + printf "[ FAIL ] lower MPC ACK rx (%d) than expected (%d)\n" \ + "${stat_ackrx_now_l}" "${expect_ackrx}" 1>&2 + rets=1 + else + printf "[ Note ] fallback due to TCP OoO" + fi fi if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then