Skip to content

Commit

Permalink
Merge branch 'mptcp-tcp-fallback'
Browse files Browse the repository at this point in the history
Mat Martineau says:

====================
mptcp: TCP fallback for established connections

RFC 8684 allows some MPTCP connections to fall back to regular TCP when
the MPTCP DSS checksum detects middlebox interference, there is only a
single subflow, and there is no unacknowledged out-of-sequence
data. When this condition is detected, the stack sends a MPTCP DSS
option with an "infinite mapping" to signal that a fallback is
happening, and the peers will stop sending MPTCP options in their TCP
headers. The Linux MPTCP stack has not yet supported this type of
fallback, instead closing the connection when the MPTCP checksum fails.

This series adds support for fallback to regular TCP in a more limited
scenario, for only MPTCP connections that have never connected
additional subflows or transmitted out-of-sequence data. The selftests
are also updated to check new MIBs that track infinite mappings.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Apr 23, 2022
2 parents 31693d0 + 8bd03be commit 988998a
Show file tree
Hide file tree
Showing 10 changed files with 121 additions and 31 deletions.
3 changes: 2 additions & 1 deletion include/net/mptcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ struct mptcp_ext {
frozen:1,
reset_transient:1;
u8 reset_reason:4,
csum_reqd:1;
csum_reqd:1,
infinite_map:1;
};

#define MPTCP_RM_IDS_MAX 8
Expand Down
6 changes: 4 additions & 2 deletions include/trace/events/mptcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ DECLARE_EVENT_CLASS(mptcp_dump_mpext,
__field(u8, reset_transient)
__field(u8, reset_reason)
__field(u8, csum_reqd)
__field(u8, infinite_map)
),

TP_fast_assign(
Expand All @@ -102,17 +103,18 @@ DECLARE_EVENT_CLASS(mptcp_dump_mpext,
__entry->reset_transient = mpext->reset_transient;
__entry->reset_reason = mpext->reset_reason;
__entry->csum_reqd = mpext->csum_reqd;
__entry->infinite_map = mpext->infinite_map;
),

TP_printk("data_ack=%llu data_seq=%llu subflow_seq=%u data_len=%u csum=%x use_map=%u dsn64=%u data_fin=%u use_ack=%u ack64=%u mpc_map=%u frozen=%u reset_transient=%u reset_reason=%u csum_reqd=%u",
TP_printk("data_ack=%llu data_seq=%llu subflow_seq=%u data_len=%u csum=%x use_map=%u dsn64=%u data_fin=%u use_ack=%u ack64=%u mpc_map=%u frozen=%u reset_transient=%u reset_reason=%u csum_reqd=%u infinite_map=%u",
__entry->data_ack, __entry->data_seq,
__entry->subflow_seq, __entry->data_len,
__entry->csum, __entry->use_map,
__entry->dsn64, __entry->data_fin,
__entry->use_ack, __entry->ack64,
__entry->mpc_map, __entry->frozen,
__entry->reset_transient, __entry->reset_reason,
__entry->csum_reqd)
__entry->csum_reqd, __entry->infinite_map)
);

DEFINE_EVENT(mptcp_dump_mpext, mptcp_sendmsg_frag,
Expand Down
1 change: 1 addition & 0 deletions net/mptcp/mib.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("MPJoinAckRx", MPTCP_MIB_JOINACKRX),
SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC),
SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH),
SNMP_MIB_ITEM("InfiniteMapTx", MPTCP_MIB_INFINITEMAPTX),
SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX),
SNMP_MIB_ITEM("DSSNoMatchTCP", MPTCP_MIB_DSSTCPMISMATCH),
SNMP_MIB_ITEM("DataCsumErr", MPTCP_MIB_DATACSUMERR),
Expand Down
1 change: 1 addition & 0 deletions net/mptcp/mib.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_JOINACKRX, /* Received an ACK + MP_JOIN */
MPTCP_MIB_JOINACKMAC, /* HMAC was wrong on ACK + MP_JOIN */
MPTCP_MIB_DSSNOMATCH, /* Received a new mapping that did not match the previous one */
MPTCP_MIB_INFINITEMAPTX, /* Sent an infinite mapping */
MPTCP_MIB_INFINITEMAPRX, /* Received an infinite mapping */
MPTCP_MIB_DSSTCPMISMATCH, /* DSS-mapping did not map with TCP's sequence numbers */
MPTCP_MIB_DATACSUMERR, /* The data checksum fail */
Expand Down
8 changes: 6 additions & 2 deletions net/mptcp/options.c
Original file line number Diff line number Diff line change
Expand Up @@ -825,7 +825,7 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,

opts->suboptions = 0;

if (unlikely(__mptcp_check_fallback(msk)))
if (unlikely(__mptcp_check_fallback(msk) && !mptcp_check_infinite_map(skb)))
return false;

if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) {
Expand Down Expand Up @@ -1340,8 +1340,12 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
put_unaligned_be32(mpext->subflow_seq, ptr);
ptr += 1;
if (opts->csum_reqd) {
/* data_len == 0 is reserved for the infinite mapping,
* the checksum will also be set to 0.
*/
put_unaligned_be32(mpext->data_len << 16 |
mptcp_make_csum(mpext), ptr);
(mpext->data_len ? mptcp_make_csum(mpext) : 0),
ptr);
} else {
put_unaligned_be32(mpext->data_len << 16 |
TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
Expand Down
6 changes: 6 additions & 0 deletions net/mptcp/pm.c
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,13 @@ void mptcp_pm_mp_prio_received(struct sock *ssk, u8 bkup)

void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn);

pr_debug("fail_seq=%llu", fail_seq);

if (!mptcp_has_another_subflow(sk) && READ_ONCE(msk->allow_infinite_fallback))
subflow->send_infinite_map = 1;
}

/* path manager helpers */
Expand Down
21 changes: 21 additions & 0 deletions net/mptcp/protocol.c
Original file line number Diff line number Diff line change
Expand Up @@ -1229,6 +1229,22 @@ static void mptcp_update_data_checksum(struct sk_buff *skb, int added)
mpext->csum = csum_fold(csum_block_add(csum, skb_checksum(skb, offset, added, 0), offset));
}

static void mptcp_update_infinite_map(struct mptcp_sock *msk,
struct sock *ssk,
struct mptcp_ext *mpext)
{
if (!mpext)
return;

mpext->infinite_map = 1;
mpext->data_len = 0;

MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPTX);
mptcp_subflow_ctx(ssk)->send_infinite_map = 0;
pr_fallback(msk);
__mptcp_do_fallback(msk);
}

static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
struct mptcp_data_frag *dfrag,
struct mptcp_sendmsg_info *info)
Expand Down Expand Up @@ -1360,6 +1376,8 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
out:
if (READ_ONCE(msk->csum_enabled))
mptcp_update_data_checksum(skb, copy);
if (mptcp_subflow_ctx(ssk)->send_infinite_map)
mptcp_update_infinite_map(msk, ssk, mpext);
trace_mptcp_sendmsg_frag(mpext);
mptcp_subflow_ctx(ssk)->rel_write_seq += copy;
return copy;
Expand Down Expand Up @@ -2465,6 +2483,7 @@ static void __mptcp_retrans(struct sock *sk)
dfrag->already_sent = max(dfrag->already_sent, info.sent);
tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
info.size_goal);
WRITE_ONCE(msk->allow_infinite_fallback, false);
}

release_sock(ssk);
Expand Down Expand Up @@ -2539,6 +2558,7 @@ static int __mptcp_init_sock(struct sock *sk)
msk->first = NULL;
inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
WRITE_ONCE(msk->allow_infinite_fallback, true);
msk->recovery = false;

mptcp_pm_data_init(msk);
Expand Down Expand Up @@ -3275,6 +3295,7 @@ bool mptcp_finish_join(struct sock *ssk)
}

subflow->map_seq = READ_ONCE(msk->ack_seq);
WRITE_ONCE(msk->allow_infinite_fallback, false);

out:
mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC);
Expand Down
13 changes: 13 additions & 0 deletions net/mptcp/protocol.h
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,7 @@ struct mptcp_sock {
bool rcv_fastclose;
bool use_64bit_ack; /* Set when we received a 64-bit DSN */
bool csum_enabled;
bool allow_infinite_fallback;
u8 recvmsg_inq:1,
cork:1,
nodelay:1;
Expand Down Expand Up @@ -440,6 +441,7 @@ struct mptcp_subflow_context {
send_mp_prio : 1,
send_mp_fail : 1,
send_fastclose : 1,
send_infinite_map : 1,
rx_eof : 1,
can_ack : 1, /* only after processing the remote a key */
disposable : 1, /* ctx can be free at ulp release time */
Expand Down Expand Up @@ -876,6 +878,17 @@ static inline void mptcp_do_fallback(struct sock *sk)

#define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)", __func__, a)

static inline bool mptcp_check_infinite_map(struct sk_buff *skb)
{
struct mptcp_ext *mpext;

mpext = skb ? mptcp_get_ext(skb) : NULL;
if (mpext && mpext->infinite_map)
return true;

return false;
}

static inline bool subflow_simultaneous_connect(struct sock *sk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
Expand Down
57 changes: 32 additions & 25 deletions net/mptcp/subflow.c
Original file line number Diff line number Diff line change
Expand Up @@ -1006,7 +1006,9 @@ static enum mapping_status get_mapping_status(struct sock *ssk,

data_len = mpext->data_len;
if (data_len == 0) {
pr_debug("infinite mapping received");
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX);
subflow->map_data_len = 0;
return MAPPING_INVALID;
}

Expand Down Expand Up @@ -1203,35 +1205,39 @@ static bool subflow_check_data_avail(struct sock *ssk)
return false;

fallback:
/* RFC 8684 section 3.7. */
if (subflow->send_mp_fail) {
if (mptcp_has_another_subflow(ssk)) {
while ((skb = skb_peek(&ssk->sk_receive_queue)))
sk_eat_skb(ssk, skb);
if (!__mptcp_check_fallback(msk)) {
/* RFC 8684 section 3.7. */
if (subflow->send_mp_fail) {
if (mptcp_has_another_subflow(ssk) ||
!READ_ONCE(msk->allow_infinite_fallback)) {
ssk->sk_err = EBADMSG;
tcp_set_state(ssk, TCP_CLOSE);
subflow->reset_transient = 0;
subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
tcp_send_active_reset(ssk, GFP_ATOMIC);
while ((skb = skb_peek(&ssk->sk_receive_queue)))
sk_eat_skb(ssk, skb);
}
WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
return true;
}
ssk->sk_err = EBADMSG;
tcp_set_state(ssk, TCP_CLOSE);
subflow->reset_transient = 0;
subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
tcp_send_active_reset(ssk, GFP_ATOMIC);
WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
return true;
}

if (subflow->mp_join || subflow->fully_established) {
/* fatal protocol error, close the socket.
* subflow_error_report() will introduce the appropriate barriers
*/
ssk->sk_err = EBADMSG;
tcp_set_state(ssk, TCP_CLOSE);
subflow->reset_transient = 0;
subflow->reset_reason = MPTCP_RST_EMPTCP;
tcp_send_active_reset(ssk, GFP_ATOMIC);
WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
return false;
if ((subflow->mp_join || subflow->fully_established) && subflow->map_data_len) {
/* fatal protocol error, close the socket.
* subflow_error_report() will introduce the appropriate barriers
*/
ssk->sk_err = EBADMSG;
tcp_set_state(ssk, TCP_CLOSE);
subflow->reset_transient = 0;
subflow->reset_reason = MPTCP_RST_EMPTCP;
tcp_send_active_reset(ssk, GFP_ATOMIC);
WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
return false;
}

__mptcp_do_fallback(msk);
}

__mptcp_do_fallback(msk);
skb = skb_peek(&ssk->sk_receive_queue);
subflow->map_valid = 1;
subflow->map_seq = READ_ONCE(msk->ack_seq);
Expand Down Expand Up @@ -1483,6 +1489,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
/* discard the subflow socket */
mptcp_sock_graft(ssk, sk->sk_socket);
iput(SOCK_INODE(sf));
WRITE_ONCE(msk->allow_infinite_fallback, false);
return err;

failed_unlink:
Expand Down
36 changes: 35 additions & 1 deletion tools/testing/selftests/net/mptcp/mptcp_join.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1106,6 +1106,38 @@ chk_rst_nr()
echo "$extra_msg"
}

chk_infi_nr()
{
local infi_tx=$1
local infi_rx=$2
local count
local dump_stats

printf "%-${nr_blank}s %s" " " "itx"
count=$(ip netns exec $ns2 nstat -as | grep InfiniteMapTx | awk '{print $2}')
[ -z "$count" ] && count=0
if [ "$count" != "$infi_tx" ]; then
echo "[fail] got $count infinite map[s] TX expected $infi_tx"
fail_test
dump_stats=1
else
echo -n "[ ok ]"
fi

echo -n " - infirx"
count=$(ip netns exec $ns1 nstat -as | grep InfiniteMapRx | awk '{print $2}')
[ -z "$count" ] && count=0
if [ "$count" != "$infi_rx" ]; then
echo "[fail] got $count infinite map[s] RX expected $infi_rx"
fail_test
dump_stats=1
else
echo "[ ok ]"
fi

[ "${dump_stats}" = 1 ] && dump_stats
}

chk_join_nr()
{
local syn_nr=$1
Expand All @@ -1115,7 +1147,8 @@ chk_join_nr()
local csum_ns2=${5:-0}
local fail_nr=${6:-0}
local rst_nr=${7:-0}
local corrupted_pkts=${8:-0}
local infi_nr=${8:-0}
local corrupted_pkts=${9:-0}
local count
local dump_stats
local with_cookie
Expand Down Expand Up @@ -1170,6 +1203,7 @@ chk_join_nr()
chk_csum_nr $csum_ns1 $csum_ns2
chk_fail_nr $fail_nr $fail_nr
chk_rst_nr $rst_nr $rst_nr
chk_infi_nr $infi_nr $infi_nr
fi
}

Expand Down

0 comments on commit 988998a

Please sign in to comment.