From 752e906732c69412087f716e93baa0330cb7cce3 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 17 Jun 2021 16:46:07 -0700 Subject: [PATCH 01/16] mptcp: add csum_enabled in mptcp_sock This patch added a new member named csum_enabled in struct mptcp_sock, used a dummy mptcp_is_checksum_enabled() helper to initialize it. Also added a new member named mptcpi_csum_enabled in struct mptcp_info to expose the csum_enabled flag. Acked-by: Paolo Abeni Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/uapi/linux/mptcp.h | 1 + net/mptcp/mptcp_diag.c | 1 + net/mptcp/protocol.c | 1 + net/mptcp/protocol.h | 2 ++ 4 files changed, 5 insertions(+) diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 8eb3c0844bfff..7b05f7102321a 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -105,6 +105,7 @@ struct mptcp_info { __u64 mptcpi_rcv_nxt; __u8 mptcpi_local_addr_used; __u8 mptcpi_local_addr_max; + __u8 mptcpi_csum_enabled; }; /* diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c index f16d9b5ee978e..8f88ddeab6a2e 100644 --- a/net/mptcp/mptcp_diag.c +++ b/net/mptcp/mptcp_diag.c @@ -144,6 +144,7 @@ static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, info->mptcpi_write_seq = READ_ONCE(msk->write_seq); info->mptcpi_snd_una = READ_ONCE(msk->snd_una); info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq); + info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled); unlock_sock_fast(sk, slow); } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 9930950899900..2caca0dc2c1c9 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2453,6 +2453,7 @@ static int __mptcp_init_sock(struct sock *sk) msk->ack_hint = NULL; msk->first = NULL; inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss; + WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk))); mptcp_pm_data_init(msk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 89f6b73783d5c..1fc6693e257e1 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -234,6 +234,7 @@ struct mptcp_sock { bool snd_data_fin_enable; bool rcv_fastclose; bool use_64bit_ack; /* Set when we received a 64-bit DSN */ + bool csum_enabled; spinlock_t join_list_lock; struct sock *ack_hint; struct work_struct work; @@ -525,6 +526,7 @@ static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *su int mptcp_is_enabled(struct net *net); unsigned int mptcp_get_add_addr_timeout(struct net *net); +static inline int mptcp_is_checksum_enabled(struct net *net) { return false; } void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, struct mptcp_options_received *mp_opt); bool mptcp_subflow_data_available(struct sock *sk); From d0cc298745f5abb3c43319cb9485daf3471d6f94 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 17 Jun 2021 16:46:08 -0700 Subject: [PATCH 02/16] mptcp: generate the data checksum This patch added a new member named csum in struct mptcp_ext, implemented a new function named mptcp_generate_data_checksum(). Generate the data checksum in mptcp_sendmsg_frag, save it in mpext->csum. Note that we must generate the csum for zero window probe, too. Do the csum update incrementally, to avoid multiple csum computation when the data is appended to existing skb. Note that in a later patch we will skip unneeded csum related operation. Changes not included here to keep the delta small. Co-developed-by: Paolo Abeni Signed-off-by: Paolo Abeni Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/net/mptcp.h | 1 + net/mptcp/protocol.c | 18 +++++++++++++++++- net/mptcp/protocol.h | 7 +++++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/include/net/mptcp.h b/include/net/mptcp.h index 83f23774b9084..23bbd439e1152 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -23,6 +23,7 @@ struct mptcp_ext { u64 data_seq; u32 subflow_seq; u16 data_len; + __sum16 csum; u8 use_map:1, dsn64:1, data_fin:1, diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 2caca0dc2c1c9..f0da067301f63 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1308,6 +1308,18 @@ static bool mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk) return __mptcp_alloc_tx_skb(sk, ssk, sk->sk_allocation); } +/* note: this always recompute the csum on the whole skb, even + * if we just appended a single frag. More status info needed + */ +static void mptcp_update_data_checksum(struct sk_buff *skb, int added) +{ + struct mptcp_ext *mpext = mptcp_get_ext(skb); + __wsum csum = ~csum_unfold(mpext->csum); + int offset = skb->len - added; + + mpext->csum = csum_fold(csum_block_add(csum, skb_checksum(skb, offset, added, 0), offset)); +} + static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, struct mptcp_data_frag *dfrag, struct mptcp_sendmsg_info *info) @@ -1402,10 +1414,14 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, if (zero_window_probe) { mptcp_subflow_ctx(ssk)->rel_write_seq += ret; mpext->frozen = 1; - ret = 0; + if (READ_ONCE(msk->csum_enabled)) + mptcp_update_data_checksum(tail, ret); tcp_push_pending_frames(ssk); + return 0; } out: + if (READ_ONCE(msk->csum_enabled)) + mptcp_update_data_checksum(tail, ret); mptcp_subflow_ctx(ssk)->rel_write_seq += ret; return ret; } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 1fc6693e257e1..4913ac7b6d199 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -336,6 +336,13 @@ static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk) return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list); } +struct csum_pseudo_header { + __be64 data_seq; + __be32 subflow_seq; + __be16 data_len; + __sum16 csum; +}; + struct mptcp_subflow_request_sock { struct tcp_request_sock sk; u16 mp_capable : 1, From 06fe1719aa501e3b574b1b2b3a7ad2ddac5fb9cb Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 17 Jun 2021 16:46:09 -0700 Subject: [PATCH 03/16] mptcp: add csum_reqd in mptcp_out_options This patch added a new member csum_reqd in struct mptcp_out_options and struct mptcp_subflow_request_sock. Initialized it with the helper function mptcp_is_checksum_enabled(). In mptcp_write_options, if this field is enabled, send out the MP_CAPABLE suboption with the MPTCP_CAP_CHECKSUM_REQD flag. Acked-by: Paolo Abeni Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/net/mptcp.h | 5 +++-- net/mptcp/options.c | 11 +++++++++-- net/mptcp/protocol.h | 3 ++- net/mptcp/subflow.c | 1 + 4 files changed, 15 insertions(+), 5 deletions(-) diff --git a/include/net/mptcp.h b/include/net/mptcp.h index 23bbd439e1152..33af68eea96f2 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -64,8 +64,9 @@ struct mptcp_out_options { struct mptcp_rm_list rm_list; u8 join_id; u8 backup; - u8 reset_reason:4; - u8 reset_transient:1; + u8 reset_reason:4, + reset_transient:1, + csum_reqd:1; u32 nonce; u64 thmac; u32 token; diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 6b825fb3fa832..bb3a1f3b6e99f 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -380,6 +380,7 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb, subflow->snd_isn = TCP_SKB_CB(skb)->end_seq; if (subflow->request_mptcp) { opts->suboptions = OPTION_MPTCP_MPC_SYN; + opts->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk)); *size = TCPOLEN_MPTCP_MPC_SYN; return true; } else if (subflow->request_join) { @@ -435,6 +436,7 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, struct mptcp_out_options *opts) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + struct mptcp_sock *msk = mptcp_sk(subflow->conn); struct mptcp_ext *mpext; unsigned int data_len; @@ -465,6 +467,7 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, opts->suboptions = OPTION_MPTCP_MPC_ACK; opts->sndr_key = subflow->local_key; opts->rcvr_key = subflow->remote_key; + opts->csum_reqd = READ_ONCE(msk->csum_enabled); /* Section 3.1. * The MP_CAPABLE option is carried on the SYN, SYN/ACK, and ACK @@ -789,6 +792,7 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size, if (subflow_req->mp_capable) { opts->suboptions = OPTION_MPTCP_MPC_SYNACK; opts->sndr_key = subflow_req->local_key; + opts->csum_reqd = subflow_req->csum_reqd; *size = TCPOLEN_MPTCP_MPC_SYNACK; pr_debug("subflow_req=%p, local_key=%llu", subflow_req, subflow_req->local_key); @@ -1123,7 +1127,7 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, { if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK | OPTION_MPTCP_MPC_ACK) & opts->suboptions) { - u8 len; + u8 len, flag = MPTCP_CAP_HMAC_SHA256; if (OPTION_MPTCP_MPC_SYN & opts->suboptions) len = TCPOLEN_MPTCP_MPC_SYN; @@ -1134,9 +1138,12 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, else len = TCPOLEN_MPTCP_MPC_ACK; + if (opts->csum_reqd) + flag |= MPTCP_CAP_CHECKSUM_REQD; + *ptr++ = mptcp_option(MPTCPOPT_MP_CAPABLE, len, MPTCP_SUPPORTED_VERSION, - MPTCP_CAP_HMAC_SHA256); + flag); if (!((OPTION_MPTCP_MPC_SYNACK | OPTION_MPTCP_MPC_ACK) & opts->suboptions)) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 4913ac7b6d199..09e94726e030e 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -347,7 +347,8 @@ struct mptcp_subflow_request_sock { struct tcp_request_sock sk; u16 mp_capable : 1, mp_join : 1, - backup : 1; + backup : 1, + csum_reqd : 1; u8 local_id; u8 remote_id; u64 local_key; diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 33956337c46bc..45acab63c387e 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -108,6 +108,7 @@ static void subflow_init_req(struct request_sock *req, const struct sock *sk_lis subflow_req->mp_capable = 0; subflow_req->mp_join = 0; + subflow_req->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk_listener)); subflow_req->msk = NULL; mptcp_token_init_request(req); } From c94b1f96dcfb2e5bd072b10f3429ccf28778ad58 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 17 Jun 2021 16:46:10 -0700 Subject: [PATCH 04/16] mptcp: send out checksum for MP_CAPABLE with data If the checksum is enabled, send out the data checksum with the MP_CAPABLE suboption with data. In mptcp_established_options_mp, save the data checksum in opts->ext_copy.csum. In mptcp_write_options, adjust the option length and send it out with the MP_CAPABLE suboption. Co-developed-by: Paolo Abeni Signed-off-by: Paolo Abeni Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/options.c | 52 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 43 insertions(+), 9 deletions(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index bb3a1f3b6e99f..b4da08db1221a 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -439,6 +439,7 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, struct mptcp_sock *msk = mptcp_sk(subflow->conn); struct mptcp_ext *mpext; unsigned int data_len; + u8 len; /* When skb is not available, we better over-estimate the emitted * options len. A full DSS option (28 bytes) is longer than @@ -474,10 +475,16 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, * packets that start the first subflow of an MPTCP connection, * as well as the first packet that carries data */ - if (data_len > 0) - *size = ALIGN(TCPOLEN_MPTCP_MPC_ACK_DATA, 4); - else + if (data_len > 0) { + len = TCPOLEN_MPTCP_MPC_ACK_DATA; + if (opts->csum_reqd) { + opts->ext_copy.csum = mpext->csum; + len += TCPOLEN_MPTCP_DSS_CHECKSUM; + } + *size = ALIGN(len, 4); + } else { *size = TCPOLEN_MPTCP_MPC_ACK; + } pr_debug("subflow=%p, local_key=%llu, remote_key=%llu map_len=%d", subflow, subflow->local_key, subflow->remote_key, @@ -1122,6 +1129,25 @@ static void mptcp_set_rwin(const struct tcp_sock *tp) WRITE_ONCE(msk->rcv_wnd_sent, ack_seq); } +static u16 mptcp_make_csum(const struct mptcp_ext *mpext) +{ + struct csum_pseudo_header header; + __wsum csum; + + /* cfr RFC 8684 3.3.1.: + * the data sequence number used in the pseudo-header is + * always the 64-bit value, irrespective of what length is used in the + * DSS option itself. + */ + header.data_seq = cpu_to_be64(mpext->data_seq); + header.subflow_seq = htonl(mpext->subflow_seq); + header.data_len = htons(mpext->data_len); + header.csum = 0; + + csum = csum_partial(&header, sizeof(header), ~csum_unfold(mpext->csum)); + return (__force u16)csum_fold(csum); +} + void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, struct mptcp_out_options *opts) { @@ -1129,14 +1155,17 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, OPTION_MPTCP_MPC_ACK) & opts->suboptions) { u8 len, flag = MPTCP_CAP_HMAC_SHA256; - if (OPTION_MPTCP_MPC_SYN & opts->suboptions) + if (OPTION_MPTCP_MPC_SYN & opts->suboptions) { len = TCPOLEN_MPTCP_MPC_SYN; - else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions) + } else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions) { len = TCPOLEN_MPTCP_MPC_SYNACK; - else if (opts->ext_copy.data_len) + } else if (opts->ext_copy.data_len) { len = TCPOLEN_MPTCP_MPC_ACK_DATA; - else + if (opts->csum_reqd) + len += TCPOLEN_MPTCP_DSS_CHECKSUM; + } else { len = TCPOLEN_MPTCP_MPC_ACK; + } if (opts->csum_reqd) flag |= MPTCP_CAP_CHECKSUM_REQD; @@ -1159,8 +1188,13 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, if (!opts->ext_copy.data_len) goto mp_capable_done; - put_unaligned_be32(opts->ext_copy.data_len << 16 | - TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); + if (opts->csum_reqd) { + put_unaligned_be32(opts->ext_copy.data_len << 16 | + mptcp_make_csum(&opts->ext_copy), ptr); + } else { + put_unaligned_be32(opts->ext_copy.data_len << 16 | + TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); + } ptr += 1; } From c5b39e26d0036423be09c39ad142e91a2d5d278b Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 17 Jun 2021 16:46:11 -0700 Subject: [PATCH 05/16] mptcp: send out checksum for DSS In mptcp_write_options, if the checksum is enabled, adjust the option length and send out the data checksum with DSS suboption. Co-developed-by: Paolo Abeni Signed-off-by: Paolo Abeni Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/options.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index b4da08db1221a..1468774f1f871 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -478,6 +478,9 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, if (data_len > 0) { len = TCPOLEN_MPTCP_MPC_ACK_DATA; if (opts->csum_reqd) { + /* we need to propagate more info to csum the pseudo hdr */ + opts->ext_copy.data_seq = mpext->data_seq; + opts->ext_copy.subflow_seq = mpext->subflow_seq; opts->ext_copy.csum = mpext->csum; len += TCPOLEN_MPTCP_DSS_CHECKSUM; } @@ -545,18 +548,21 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, bool ret = false; u64 ack_seq; + opts->csum_reqd = READ_ONCE(msk->csum_enabled); mpext = skb ? mptcp_get_ext(skb) : NULL; if (!skb || (mpext && mpext->use_map) || snd_data_fin_enable) { - unsigned int map_size; + unsigned int map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64; - map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64; + if (mpext) { + if (opts->csum_reqd) + map_size += TCPOLEN_MPTCP_DSS_CHECKSUM; - remaining -= map_size; - dss_size = map_size; - if (mpext) opts->ext_copy = *mpext; + } + remaining -= map_size; + dss_size = map_size; if (skb && snd_data_fin_enable) mptcp_write_data_fin(subflow, skb, &opts->ext_copy); ret = true; @@ -1346,6 +1352,9 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, flags |= MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64; if (mpext->data_fin) flags |= MPTCP_DSS_DATA_FIN; + + if (opts->csum_reqd) + len += TCPOLEN_MPTCP_DSS_CHECKSUM; } *ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags); @@ -1365,8 +1374,13 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, ptr += 2; put_unaligned_be32(mpext->subflow_seq, ptr); ptr += 1; - put_unaligned_be32(mpext->data_len << 16 | - TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); + if (opts->csum_reqd) { + put_unaligned_be32(mpext->data_len << 16 | + mptcp_make_csum(mpext), ptr); + } else { + put_unaligned_be32(mpext->data_len << 16 | + TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); + } } } From c863225b79426459feca2ef5b0cc2f07e8e68771 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 17 Jun 2021 16:46:12 -0700 Subject: [PATCH 06/16] mptcp: add sk parameter for mptcp_get_options This patch added a new parameter name sk in mptcp_get_options(). Acked-by: Paolo Abeni Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/options.c | 5 +++-- net/mptcp/protocol.h | 3 ++- net/mptcp/subflow.c | 10 +++++----- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 1468774f1f871..ae69059583a73 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -323,7 +323,8 @@ static void mptcp_parse_option(const struct sk_buff *skb, } } -void mptcp_get_options(const struct sk_buff *skb, +void mptcp_get_options(const struct sock *sk, + const struct sk_buff *skb, struct mptcp_options_received *mp_opt) { const struct tcphdr *th = tcp_hdr(skb); @@ -1024,7 +1025,7 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) return; } - mptcp_get_options(skb, &mp_opt); + mptcp_get_options(sk, skb, &mp_opt); if (!check_fully_established(msk, sk, subflow, skb, &mp_opt)) return; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 09e94726e030e..a7ed0b8eb9bc4 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -586,7 +586,8 @@ int __init mptcp_proto_v6_init(void); struct sock *mptcp_sk_clone(const struct sock *sk, const struct mptcp_options_received *mp_opt, struct request_sock *req); -void mptcp_get_options(const struct sk_buff *skb, +void mptcp_get_options(const struct sock *sk, + const struct sk_buff *skb, struct mptcp_options_received *mp_opt); void mptcp_finish_connect(struct sock *sk); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 45acab63c387e..aa6b307b27c84 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -151,7 +151,7 @@ static int subflow_check_req(struct request_sock *req, return -EINVAL; #endif - mptcp_get_options(skb, &mp_opt); + mptcp_get_options(sk_listener, skb, &mp_opt); if (mp_opt.mp_capable) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE); @@ -248,7 +248,7 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req, int err; subflow_init_req(req, sk_listener); - mptcp_get_options(skb, &mp_opt); + mptcp_get_options(sk_listener, skb, &mp_opt); if (mp_opt.mp_capable && mp_opt.mp_join) return -EINVAL; @@ -395,7 +395,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->ssn_offset = TCP_SKB_CB(skb)->seq; pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset); - mptcp_get_options(skb, &mp_opt); + mptcp_get_options(sk, skb, &mp_opt); if (subflow->request_mptcp) { if (!mp_opt.mp_capable) { MPTCP_INC_STATS(sock_net(sk), @@ -639,7 +639,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, * reordered MPC will cause fallback, but we don't have other * options. */ - mptcp_get_options(skb, &mp_opt); + mptcp_get_options(sk, skb, &mp_opt); if (!mp_opt.mp_capable) { fallback = true; goto create_child; @@ -649,7 +649,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, if (!new_msk) fallback = true; } else if (subflow_req->mp_join) { - mptcp_get_options(skb, &mp_opt); + mptcp_get_options(sk, skb, &mp_opt); if (!mp_opt.mp_join || !subflow_hmac_valid(req, &mp_opt) || !mptcp_can_accept_new_subflow(subflow_req->msk)) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC); From 0625118115cf2ee8e435bf86d1c1f0bfdee9d7c8 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 17 Jun 2021 16:46:13 -0700 Subject: [PATCH 07/16] mptcp: add csum_reqd in mptcp_options_received This patch added a new flag csum_reqd in struct mptcp_options_received, if the flag MPTCP_CAP_CHECKSUM_REQD is set in the receiving MP_CAPABLE suboption, set this flag. In mptcp_sk_clone and subflow_finish_connect, if the csum_reqd flag is set, enable the msk->csum_enabled flag. Acked-by: Paolo Abeni Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/options.c | 7 ++++--- net/mptcp/protocol.c | 2 ++ net/mptcp/protocol.h | 1 + net/mptcp/subflow.c | 2 ++ 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index ae69059583a73..2e2551590ecd0 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -71,11 +71,9 @@ static void mptcp_parse_option(const struct sk_buff *skb, * "If a checksum is not present when its use has been * negotiated, the receiver MUST close the subflow with a RST as * it is considered broken." - * - * We don't implement DSS checksum - fall back to TCP. */ if (flags & MPTCP_CAP_CHECKSUM_REQD) - break; + mp_opt->csum_reqd = 1; mp_opt->mp_capable = 1; if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) { @@ -327,6 +325,8 @@ void mptcp_get_options(const struct sock *sk, const struct sk_buff *skb, struct mptcp_options_received *mp_opt) { + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + struct mptcp_sock *msk = mptcp_sk(subflow->conn); const struct tcphdr *th = tcp_hdr(skb); const unsigned char *ptr; int length; @@ -342,6 +342,7 @@ void mptcp_get_options(const struct sock *sk, mp_opt->dss = 0; mp_opt->mp_prio = 0; mp_opt->reset = 0; + mp_opt->csum_reqd = READ_ONCE(msk->csum_enabled); length = (th->doff * 4) - sizeof(struct tcphdr); ptr = (const unsigned char *)(th + 1); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index f0da067301f63..b6e5c09305335 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2810,6 +2810,8 @@ struct sock *mptcp_sk_clone(const struct sock *sk, msk->token = subflow_req->token; msk->subflow = NULL; WRITE_ONCE(msk->fully_established, false); + if (mp_opt->csum_reqd) + WRITE_ONCE(msk->csum_enabled, true); msk->write_seq = subflow_req->idsn + 1; msk->snd_nxt = msk->write_seq; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index a7ed0b8eb9bc4..66e5063ac6c93 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -133,6 +133,7 @@ struct mptcp_options_received { rm_addr : 1, mp_prio : 1, echo : 1, + csum_reqd : 1, backup : 1; u32 token; u32 nonce; diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index aa6b307b27c84..9b82ce635c6e6 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -405,6 +405,8 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) goto fallback; } + if (mp_opt.csum_reqd) + WRITE_ONCE(mptcp_sk(parent)->csum_enabled, true); subflow->mp_capable = 1; subflow->can_ack = 1; subflow->remote_key = mp_opt.sndr_key; From 208e8f66926c5d73e3f359385c1dd49dbc48d067 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 17 Jun 2021 16:46:14 -0700 Subject: [PATCH 08/16] mptcp: receive checksum for MP_CAPABLE with data This patch added a new member named csum in struct mptcp_options_received. When parsing the MP_CAPABLE with data, if the checksum is enabled, adjust the expected_opsize. If the receiving option length matches the length with the data checksum, get the checksum value and save it in mp_opt->csum. And in mptcp_incoming_options, pass it to mpext->csum. We always parse any csum/nocsum combination and delay the presence check to later code, to allow reset if missing. Additionally, in the TX path, use the newly introduce ext field to avoid MPTCP csum recomputation on TCP retransmission and unneeded csum update on when setting the data fin_flag. Co-developed-by: Paolo Abeni Signed-off-by: Paolo Abeni Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/net/mptcp.h | 3 ++- net/mptcp/options.c | 35 ++++++++++++++++++++++++++--------- net/mptcp/protocol.h | 3 +++ 3 files changed, 31 insertions(+), 10 deletions(-) diff --git a/include/net/mptcp.h b/include/net/mptcp.h index 33af68eea96f2..d61bbbf119792 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -32,7 +32,8 @@ struct mptcp_ext { mpc_map:1, frozen:1, reset_transient:1; - u8 reset_reason:4; + u8 reset_reason:4, + csum_reqd:1; }; #define MPTCP_RM_IDS_MAX 8 diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 2e2551590ecd0..8cbc758689696 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -44,7 +44,20 @@ static void mptcp_parse_option(const struct sk_buff *skb, else expected_opsize = TCPOLEN_MPTCP_MPC_SYN; } - if (opsize != expected_opsize) + + /* Cfr RFC 8684 Section 3.3.0: + * If a checksum is present but its use had + * not been negotiated in the MP_CAPABLE handshake, the receiver MUST + * close the subflow with a RST, as it is not behaving as negotiated. + * If a checksum is not present when its use has been negotiated, the + * receiver MUST close the subflow with a RST, as it is considered + * broken + * We parse even option with mismatching csum presence, so that + * later in subflow_data_ready we can trigger the reset. + */ + if (opsize != expected_opsize && + (expected_opsize != TCPOLEN_MPTCP_MPC_ACK_DATA || + opsize != TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM)) break; /* try to be gentle vs future versions on the initial syn */ @@ -66,11 +79,6 @@ static void mptcp_parse_option(const struct sk_buff *skb, * host requires the use of checksums, checksums MUST be used. * In other words, the only way for checksums not to be used * is if both hosts in their SYNs set A=0." - * - * Section 3.3.0: - * "If a checksum is not present when its use has been - * negotiated, the receiver MUST close the subflow with a RST as - * it is considered broken." */ if (flags & MPTCP_CAP_CHECKSUM_REQD) mp_opt->csum_reqd = 1; @@ -84,7 +92,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->rcvr_key = get_unaligned_be64(ptr); ptr += 8; } - if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA) { + if (opsize >= TCPOLEN_MPTCP_MPC_ACK_DATA) { /* Section 3.1.: * "the data parameters in a MP_CAPABLE are semantically * equivalent to those in a DSS option and can be used @@ -96,9 +104,14 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->data_len = get_unaligned_be16(ptr); ptr += 2; } - pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d", + if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM) { + mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr); + mp_opt->csum_reqd = 1; + ptr += 2; + } + pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d csum=%u", version, flags, opsize, mp_opt->sndr_key, - mp_opt->rcvr_key, mp_opt->data_len); + mp_opt->rcvr_key, mp_opt->data_len, mp_opt->csum); break; case MPTCPOPT_MP_JOIN: @@ -1118,6 +1131,10 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) } mpext->data_len = mp_opt.data_len; mpext->use_map = 1; + mpext->csum_reqd = mp_opt.csum_reqd; + + if (mpext->csum_reqd) + mpext->csum = mp_opt.csum; } } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 66e5063ac6c93..76194babc7543 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -68,6 +68,8 @@ #define TCPOLEN_MPTCP_FASTCLOSE 12 #define TCPOLEN_MPTCP_RST 4 +#define TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM (TCPOLEN_MPTCP_DSS_CHECKSUM + TCPOLEN_MPTCP_MPC_ACK_DATA) + /* MPTCP MP_JOIN flags */ #define MPTCPOPT_BACKUP BIT(0) #define MPTCPOPT_HMAC_LEN 20 @@ -124,6 +126,7 @@ struct mptcp_options_received { u64 data_seq; u32 subflow_seq; u16 data_len; + __sum16 csum; u16 mp_capable : 1, mp_join : 1, fastclose : 1, From 390b95a5fb84e7999eedb021382c96d1500e01fc Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 17 Jun 2021 16:46:15 -0700 Subject: [PATCH 09/16] mptcp: receive checksum for DSS In mptcp_parse_option, adjust the expected_opsize, and always parse the data checksum value from the receiving DSS regardless of csum presence. Then save it in mp_opt->csum. Co-developed-by: Paolo Abeni Signed-off-by: Paolo Abeni Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/options.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 8cbc758689696..1aec01686c1ad 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -182,10 +182,8 @@ static void mptcp_parse_option(const struct sk_buff *skb, expected_opsize += TCPOLEN_MPTCP_DSS_MAP32; } - /* RFC 6824, Section 3.3: - * If a checksum is present, but its use had - * not been negotiated in the MP_CAPABLE handshake, - * the checksum field MUST be ignored. + /* Always parse any csum presence combination, we will enforce + * RFC 8684 Section 3.3.0 checks later in subflow_data_ready */ if (opsize != expected_opsize && opsize != expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM) @@ -220,9 +218,15 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->data_len = get_unaligned_be16(ptr); ptr += 2; - pr_debug("data_seq=%llu subflow_seq=%u data_len=%u", + if (opsize == expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM) { + mp_opt->csum_reqd = 1; + mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr); + ptr += 2; + } + + pr_debug("data_seq=%llu subflow_seq=%u data_len=%u csum=%d:%u", mp_opt->data_seq, mp_opt->subflow_seq, - mp_opt->data_len); + mp_opt->data_len, mp_opt->csum_reqd, mp_opt->csum); } break; From dd8bcd1768ff76bf2da1154897871adcc4ec078a Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 17 Jun 2021 16:46:16 -0700 Subject: [PATCH 10/16] mptcp: validate the data checksum This patch added three new members named data_csum, csum_len and map_csum in struct mptcp_subflow_context, implemented a new function named mptcp_validate_data_checksum(). If the current mapping is valid and csum is enabled traverse the later pending skbs and compute csum incrementally till the whole mapping has been covered. If not enough data is available in the rx queue, return MAPPING_EMPTY - that is, no data. Next subflow_data_ready invocation will trigger again csum computation. When the full DSS is available, validate the csum and return to the caller an appropriate error code, to trigger subflow reset of fallback as required by the RFC. Additionally: - if the csum prevence in the DSS don't match the negotiated value e.g. csum present, but not requested, return invalid mapping to trigger subflow reset. - keep some csum state, to avoid re-compute the csum on the same data when multiple rx queue traversal are required. - clean-up the uncompleted mapping from the receive queue on close, to allow proper subflow disposal Co-developed-by: Geliang Tang Signed-off-by: Geliang Tang Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.h | 4 ++ net/mptcp/subflow.c | 105 ++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 103 insertions(+), 6 deletions(-) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 76194babc7543..12637299b42eb 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -400,6 +400,8 @@ struct mptcp_subflow_context { u32 map_subflow_seq; u32 ssn_offset; u32 map_data_len; + __wsum map_data_csum; + u32 map_csum_len; u32 request_mptcp : 1, /* send MP_CAPABLE */ request_join : 1, /* send MP_JOIN */ request_bkup : 1, @@ -409,6 +411,8 @@ struct mptcp_subflow_context { pm_notified : 1, /* PM hook called for established status */ conn_finished : 1, map_valid : 1, + map_csum_reqd : 1, + map_data_fin : 1, mpc_map : 1, backup : 1, send_mp_prio : 1, diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 9b82ce635c6e6..9ccc4686d0d4d 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -827,10 +827,90 @@ static bool validate_mapping(struct sock *ssk, struct sk_buff *skb) return true; } +static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *skb, + bool csum_reqd) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + struct csum_pseudo_header header; + u32 offset, seq, delta; + __wsum csum; + int len; + + if (!csum_reqd) + return MAPPING_OK; + + /* mapping already validated on previous traversal */ + if (subflow->map_csum_len == subflow->map_data_len) + return MAPPING_OK; + + /* traverse the receive queue, ensuring it contains a full + * DSS mapping and accumulating the related csum. + * Preserve the accoumlate csum across multiple calls, to compute + * the csum only once + */ + delta = subflow->map_data_len - subflow->map_csum_len; + for (;;) { + seq = tcp_sk(ssk)->copied_seq + subflow->map_csum_len; + offset = seq - TCP_SKB_CB(skb)->seq; + + /* if the current skb has not been accounted yet, csum its contents + * up to the amount covered by the current DSS + */ + if (offset < skb->len) { + __wsum csum; + + len = min(skb->len - offset, delta); + csum = skb_checksum(skb, offset, len, 0); + subflow->map_data_csum = csum_block_add(subflow->map_data_csum, csum, + subflow->map_csum_len); + + delta -= len; + subflow->map_csum_len += len; + } + if (delta == 0) + break; + + if (skb_queue_is_last(&ssk->sk_receive_queue, skb)) { + /* if this subflow is closed, the partial mapping + * will be never completed; flush the pending skbs, so + * that subflow_sched_work_if_closed() can kick in + */ + if (unlikely(ssk->sk_state == TCP_CLOSE)) + while ((skb = skb_peek(&ssk->sk_receive_queue))) + sk_eat_skb(ssk, skb); + + /* not enough data to validate the csum */ + return MAPPING_EMPTY; + } + + /* the DSS mapping for next skbs will be validated later, + * when a get_mapping_status call will process such skb + */ + skb = skb->next; + } + + /* note that 'map_data_len' accounts only for the carried data, does + * not include the eventual seq increment due to the data fin, + * while the pseudo header requires the original DSS data len, + * including that + */ + header.data_seq = cpu_to_be64(subflow->map_seq); + header.subflow_seq = htonl(subflow->map_subflow_seq); + header.data_len = htons(subflow->map_data_len + subflow->map_data_fin); + header.csum = 0; + + csum = csum_partial(&header, sizeof(header), subflow->map_data_csum); + if (unlikely(csum_fold(csum))) + return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY; + + return MAPPING_OK; +} + static enum mapping_status get_mapping_status(struct sock *ssk, struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + bool csum_reqd = READ_ONCE(msk->csum_enabled); struct mptcp_ext *mpext; struct sk_buff *skb; u16 data_len; @@ -923,9 +1003,10 @@ static enum mapping_status get_mapping_status(struct sock *ssk, /* Allow replacing only with an identical map */ if (subflow->map_seq == map_seq && subflow->map_subflow_seq == mpext->subflow_seq && - subflow->map_data_len == data_len) { + subflow->map_data_len == data_len && + subflow->map_csum_reqd == mpext->csum_reqd) { skb_ext_del(skb, SKB_EXT_MPTCP); - return MAPPING_OK; + goto validate_csum; } /* If this skb data are fully covered by the current mapping, @@ -937,17 +1018,27 @@ static enum mapping_status get_mapping_status(struct sock *ssk, } /* will validate the next map after consuming the current one */ - return MAPPING_OK; + goto validate_csum; } subflow->map_seq = map_seq; subflow->map_subflow_seq = mpext->subflow_seq; subflow->map_data_len = data_len; subflow->map_valid = 1; + subflow->map_data_fin = mpext->data_fin; subflow->mpc_map = mpext->mpc_map; - pr_debug("new map seq=%llu subflow_seq=%u data_len=%u", + subflow->map_csum_reqd = mpext->csum_reqd; + subflow->map_csum_len = 0; + subflow->map_data_csum = csum_unfold(mpext->csum); + + /* Cfr RFC 8684 Section 3.3.0 */ + if (unlikely(subflow->map_csum_reqd != csum_reqd)) + return MAPPING_INVALID; + + pr_debug("new map seq=%llu subflow_seq=%u data_len=%u csum=%d:%u", subflow->map_seq, subflow->map_subflow_seq, - subflow->map_data_len); + subflow->map_data_len, subflow->map_csum_reqd, + subflow->map_data_csum); validate_seq: /* we revalidate valid mapping on new skb, because we must ensure @@ -957,7 +1048,9 @@ static enum mapping_status get_mapping_status(struct sock *ssk, return MAPPING_INVALID; skb_ext_del(skb, SKB_EXT_MPTCP); - return MAPPING_OK; + +validate_csum: + return validate_data_csum(ssk, skb, csum_reqd); } static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb, From 4e14867d5e9185e38f730d65c89b728640d68dd1 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 17 Jun 2021 16:46:17 -0700 Subject: [PATCH 11/16] mptcp: tune re-injections for csum enabled mode If the MPTCP-level checksum is enabled, on re-injections we must spool a complete DSS, or the receive side will not be able to compute the csum and process any data. Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index b6e5c09305335..42fc7187beeef 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2375,8 +2375,8 @@ static void __mptcp_retrans(struct sock *sk) /* limit retransmission to the bytes already sent on some subflows */ info.sent = 0; - info.limit = dfrag->already_sent; - while (info.sent < dfrag->already_sent) { + info.limit = READ_ONCE(msk->csum_enabled) ? dfrag->data_len : dfrag->already_sent; + while (info.sent < info.limit) { if (!mptcp_alloc_tx_skb(sk, ssk)) break; @@ -2388,9 +2388,11 @@ static void __mptcp_retrans(struct sock *sk) copied += ret; info.sent += ret; } - if (copied) + if (copied) { + dfrag->already_sent = max(dfrag->already_sent, info.sent); tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, info.size_goal); + } mptcp_set_timeout(sk, ssk); release_sock(ssk); From fe3ab1cbd357d9d0903f2d00038c2cb7141e7fe5 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 17 Jun 2021 16:46:18 -0700 Subject: [PATCH 12/16] mptcp: add the mib for data checksum This patch added the mib for the data checksum, MPTCP_MIB_DATACSUMERR. Acked-by: Paolo Abeni Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/mib.c | 1 + net/mptcp/mib.h | 1 + net/mptcp/subflow.c | 4 +++- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index eb2dc6dbe212b..e7e60bc1fb96c 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -25,6 +25,7 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC), SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH), SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX), + SNMP_MIB_ITEM("DataCsumErr", MPTCP_MIB_DATACSUMERR), SNMP_MIB_ITEM("OFOQueueTail", MPTCP_MIB_OFOQUEUETAIL), SNMP_MIB_ITEM("OFOQueue", MPTCP_MIB_OFOQUEUE), SNMP_MIB_ITEM("OFOMerge", MPTCP_MIB_OFOMERGE), diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index f0da4f060fe1a..92e56c0cfbdd8 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -18,6 +18,7 @@ enum linux_mptcp_mib_field { MPTCP_MIB_JOINACKMAC, /* HMAC was wrong on ACK + MP_JOIN */ MPTCP_MIB_DSSNOMATCH, /* Received a new mapping that did not match the previous one */ MPTCP_MIB_INFINITEMAPRX, /* Received an infinite mapping */ + MPTCP_MIB_DATACSUMERR, /* The data checksum fail */ MPTCP_MIB_OFOQUEUETAIL, /* Segments inserted into OoO queue tail */ MPTCP_MIB_OFOQUEUE, /* Segments inserted into OoO queue */ MPTCP_MIB_OFOMERGE, /* Segments merged in OoO queue */ diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 9ccc4686d0d4d..6b1cd4257edf7 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -900,8 +900,10 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff * header.csum = 0; csum = csum_partial(&header, sizeof(header), subflow->map_data_csum); - if (unlikely(csum_fold(csum))) + if (unlikely(csum_fold(csum))) { + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR); return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY; + } return MAPPING_OK; } From fc3c82eebf8e2e193412612f509530b4ff5611bf Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 17 Jun 2021 16:46:19 -0700 Subject: [PATCH 13/16] mptcp: add a new sysctl checksum_enabled This patch added a new sysctl, named checksum_enabled, to control whether DSS checksum can be enabled. Acked-by: Paolo Abeni Co-developed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- Documentation/networking/mptcp-sysctl.rst | 8 ++++++++ net/mptcp/ctrl.c | 16 ++++++++++++++++ net/mptcp/protocol.h | 2 +- 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/Documentation/networking/mptcp-sysctl.rst b/Documentation/networking/mptcp-sysctl.rst index 3b352e5f6300c..ee06fd782465a 100644 --- a/Documentation/networking/mptcp-sysctl.rst +++ b/Documentation/networking/mptcp-sysctl.rst @@ -24,3 +24,11 @@ add_addr_timeout - INTEGER (seconds) sysctl. Default: 120 + +checksum_enabled - BOOLEAN + Control whether DSS checksum can be enabled. + + DSS checksum can be enabled if the value is nonzero. This is a + per-namespace sysctl. + + Default: 0 diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c index 1ec4d36a39f05..6c2639bb9c19f 100644 --- a/net/mptcp/ctrl.c +++ b/net/mptcp/ctrl.c @@ -23,6 +23,7 @@ struct mptcp_pernet { u8 mptcp_enabled; unsigned int add_addr_timeout; + u8 checksum_enabled; }; static struct mptcp_pernet *mptcp_get_pernet(struct net *net) @@ -40,10 +41,16 @@ unsigned int mptcp_get_add_addr_timeout(struct net *net) return mptcp_get_pernet(net)->add_addr_timeout; } +int mptcp_is_checksum_enabled(struct net *net) +{ + return mptcp_get_pernet(net)->checksum_enabled; +} + static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet) { pernet->mptcp_enabled = 1; pernet->add_addr_timeout = TCP_RTO_MAX; + pernet->checksum_enabled = 0; } #ifdef CONFIG_SYSCTL @@ -65,6 +72,14 @@ static struct ctl_table mptcp_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, + { + .procname = "checksum_enabled", + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE + }, {} }; @@ -82,6 +97,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) table[0].data = &pernet->mptcp_enabled; table[1].data = &pernet->add_addr_timeout; + table[2].data = &pernet->checksum_enabled; hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table); if (!hdr) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 12637299b42eb..16e50caf200e5 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -542,7 +542,7 @@ static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *su int mptcp_is_enabled(struct net *net); unsigned int mptcp_get_add_addr_timeout(struct net *net); -static inline int mptcp_is_checksum_enabled(struct net *net) { return false; } +int mptcp_is_checksum_enabled(struct net *net); void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, struct mptcp_options_received *mp_opt); bool mptcp_subflow_data_available(struct sock *sk); From 401e3030e68f1c761a7137dc6f0cf39f585ab4bd Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 17 Jun 2021 16:46:20 -0700 Subject: [PATCH 14/16] mptcp: dump csum fields in mptcp_dump_mpext In mptcp_dump_mpext, dump the csum fields, csum and csum_reqd in struct mptcp_dump_mpext too. Acked-by: Paolo Abeni Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/trace/events/mptcp.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/include/trace/events/mptcp.h b/include/trace/events/mptcp.h index 775a46d0b0f07..6bf43176f14c1 100644 --- a/include/trace/events/mptcp.h +++ b/include/trace/events/mptcp.h @@ -73,6 +73,7 @@ DECLARE_EVENT_CLASS(mptcp_dump_mpext, __field(u64, data_seq) __field(u32, subflow_seq) __field(u16, data_len) + __field(u16, csum) __field(u8, use_map) __field(u8, dsn64) __field(u8, data_fin) @@ -82,6 +83,7 @@ DECLARE_EVENT_CLASS(mptcp_dump_mpext, __field(u8, frozen) __field(u8, reset_transient) __field(u8, reset_reason) + __field(u8, csum_reqd) ), TP_fast_assign( @@ -89,6 +91,7 @@ DECLARE_EVENT_CLASS(mptcp_dump_mpext, __entry->data_seq = mpext->data_seq; __entry->subflow_seq = mpext->subflow_seq; __entry->data_len = mpext->data_len; + __entry->csum = (__force u16)mpext->csum; __entry->use_map = mpext->use_map; __entry->dsn64 = mpext->dsn64; __entry->data_fin = mpext->data_fin; @@ -98,16 +101,18 @@ DECLARE_EVENT_CLASS(mptcp_dump_mpext, __entry->frozen = mpext->frozen; __entry->reset_transient = mpext->reset_transient; __entry->reset_reason = mpext->reset_reason; + __entry->csum_reqd = mpext->csum_reqd; ), - TP_printk("data_ack=%llu data_seq=%llu subflow_seq=%u data_len=%u use_map=%u dsn64=%u data_fin=%u use_ack=%u ack64=%u mpc_map=%u frozen=%u reset_transient=%u reset_reason=%u", + TP_printk("data_ack=%llu data_seq=%llu subflow_seq=%u data_len=%u csum=%x use_map=%u dsn64=%u data_fin=%u use_ack=%u ack64=%u mpc_map=%u frozen=%u reset_transient=%u reset_reason=%u csum_reqd=%u", __entry->data_ack, __entry->data_seq, __entry->subflow_seq, __entry->data_len, - __entry->use_map, __entry->dsn64, - __entry->data_fin, __entry->use_ack, - __entry->ack64, __entry->mpc_map, - __entry->frozen, __entry->reset_transient, - __entry->reset_reason) + __entry->csum, __entry->use_map, + __entry->dsn64, __entry->data_fin, + __entry->use_ack, __entry->ack64, + __entry->mpc_map, __entry->frozen, + __entry->reset_transient, __entry->reset_reason, + __entry->csum_reqd) ); DEFINE_EVENT(mptcp_dump_mpext, get_mapping_status, From 94d66ba1d8e4803066b9c6a16274343a425ed1bf Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 17 Jun 2021 16:46:21 -0700 Subject: [PATCH 15/16] selftests: mptcp: enable checksum in mptcp_connect.sh This patch added a new argument "-C" for the mptcp_connect.sh script to set the sysctl checksum_enabled to 1 in ns1, ns2, ns3 and ns4 to enable the data checksum. Acked-by: Paolo Abeni Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/mptcp_connect.sh | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 9ca5f1ba461ec..69351c3eb68c5 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -3,7 +3,7 @@ time_start=$(date +%s) -optstring="S:R:d:e:l:r:h4cm:f:t" +optstring="S:R:d:e:l:r:h4cm:f:tC" ret=0 sin="" sout="" @@ -22,6 +22,7 @@ sndbuf=0 rcvbuf=0 options_log=true do_tcp=0 +checksum=false filesize=0 if [ $tc_loss -eq 100 ];then @@ -47,6 +48,7 @@ usage() { echo -e "\t-R: set rcvbuf value (default: use kernel default)" echo -e "\t-m: test mode (poll, sendfile; default: poll)" echo -e "\t-t: also run tests with TCP (use twice to non-fallback tcp)" + echo -e "\t-C: enable the MPTCP data checksum" } while getopts "$optstring" option;do @@ -104,6 +106,9 @@ while getopts "$optstring" option;do "t") do_tcp=$((do_tcp+1)) ;; + "C") + checksum=true + ;; "?") usage $0 exit 1 @@ -200,6 +205,12 @@ ip -net "$ns4" route add default via dead:beef:3::2 # use TCP syn cookies, even if no flooding was detected. ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2 +if $checksum; then + for i in "$ns1" "$ns2" "$ns3" "$ns4";do + ip netns exec $i sysctl -q net.mptcp.checksum_enabled=1 + done +fi + set_ethtool_flags() { local ns="$1" local dev="$2" From af66d3e1c3fa65f2187ab418b9934068049ea27a Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 17 Jun 2021 16:46:22 -0700 Subject: [PATCH 16/16] selftests: mptcp: enable checksum in mptcp_join.sh This patch added a new argument "-C" for the mptcp_join.sh script to set the sysctl checksum_enabled to 1 in ns1 and ns2 to enable the data checksum. In chk_join_nr, check the counter of the mib for the data checksum. Also added a new argument "-S" for the mptcp_join.sh script to start the test cases that verify the checksum handshake: * Sender and listener both have checksums off * Sender and listener both have checksums on * Sender checksums off, listener checksums on * Sender checksums on, listener checksums off The output looks like this: 01 checksum test 0 0 sum[ ok ] - csum [ ok ] 02 checksum test 1 1 sum[ ok ] - csum [ ok ] 03 checksum test 0 1 sum[ ok ] - csum [ ok ] 04 checksum test 1 0 sum[ ok ] - csum [ ok ] 05 no JOIN syn[ ok ] - synack[ ok ] - ack[ ok ] sum[ ok ] - csum [ ok ] 06 single subflow, limited by client syn[ ok ] - synack[ ok ] - ack[ ok ] sum[ ok ] - csum [ ok ] Acked-by: Paolo Abeni Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- .../testing/selftests/net/mptcp/mptcp_join.sh | 107 +++++++++++++++++- 1 file changed, 103 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index fd99485cf2a4a..523c7797f30ac 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -12,6 +12,7 @@ timeout_poll=30 timeout_test=$((timeout_poll * 2 + 1)) mptcp_connect="" capture=0 +checksum=0 do_all_tests=1 TEST_COUNT=0 @@ -49,6 +50,9 @@ init() ip netns exec $netns sysctl -q net.mptcp.enabled=1 ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0 ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0 + if [ $checksum -eq 1 ]; then + ip netns exec $netns sysctl -q net.mptcp.checksum_enabled=1 + fi done # ns1 ns2 @@ -124,6 +128,17 @@ reset_with_add_addr_timeout() -j DROP } +reset_with_checksum() +{ + local ns1_enable=$1 + local ns2_enable=$2 + + reset + + ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=$ns1_enable + ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=$ns2_enable +} + ip -Version > /dev/null 2>&1 if [ $? -ne 0 ];then echo "SKIP: Could not run test without ip tool" @@ -476,6 +491,45 @@ run_tests() fi } +chk_csum_nr() +{ + local msg=${1:-""} + local count + local dump_stats + + if [ ! -z "$msg" ]; then + printf "%02u" "$TEST_COUNT" + else + echo -n " " + fi + printf " %-36s %s" "$msg" "sum" + count=`ip netns exec $ns1 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != 0 ]; then + echo "[fail] got $count data checksum error[s] expected 0" + ret=1 + dump_stats=1 + else + echo -n "[ ok ]" + fi + echo -n " - csum " + count=`ip netns exec $ns2 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != 0 ]; then + echo "[fail] got $count data checksum error[s] expected 0" + ret=1 + dump_stats=1 + else + echo "[ ok ]" + fi + if [ "${dump_stats}" = 1 ]; then + echo Server ns stats + ip netns exec $ns1 nstat -as | grep MPTcp + echo Client ns stats + ip netns exec $ns2 nstat -as | grep MPTcp + fi +} + chk_join_nr() { local msg="$1" @@ -523,6 +577,9 @@ chk_join_nr() echo Client ns stats ip netns exec $ns2 nstat -as | grep MPTcp fi + if [ $checksum -eq 1 ]; then + chk_csum_nr + fi } chk_add_nr() @@ -1374,6 +1431,37 @@ syncookies_tests() chk_add_nr 1 1 } +checksum_tests() +{ + # checksum test 0 0 + reset_with_checksum 0 0 + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + run_tests $ns1 $ns2 10.0.1.1 + chk_csum_nr "checksum test 0 0" + + # checksum test 1 1 + reset_with_checksum 1 1 + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + run_tests $ns1 $ns2 10.0.1.1 + chk_csum_nr "checksum test 1 1" + + # checksum test 0 1 + reset_with_checksum 0 1 + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + run_tests $ns1 $ns2 10.0.1.1 + chk_csum_nr "checksum test 0 1" + + # checksum test 1 0 + reset_with_checksum 1 0 + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + run_tests $ns1 $ns2 10.0.1.1 + chk_csum_nr "checksum test 1 0" +} + all_tests() { subflows_tests @@ -1387,6 +1475,7 @@ all_tests() backup_tests add_addr_ports_tests syncookies_tests + checksum_tests } usage() @@ -1403,7 +1492,9 @@ usage() echo " -b backup_tests" echo " -p add_addr_ports_tests" echo " -k syncookies_tests" + echo " -S checksum_tests" echo " -c capture pcap files" + echo " -C enable data checksum" echo " -h help" } @@ -1418,13 +1509,16 @@ make_file "$sin" "server" 1 trap cleanup EXIT for arg in "$@"; do - # check for "capture" arg before launching tests + # check for "capture/checksum" args before launching tests if [[ "${arg}" =~ ^"-"[0-9a-zA-Z]*"c"[0-9a-zA-Z]*$ ]]; then capture=1 fi + if [[ "${arg}" =~ ^"-"[0-9a-zA-Z]*"C"[0-9a-zA-Z]*$ ]]; then + checksum=1 + fi - # exception for the capture option, the rest means: a part of the tests - if [ "${arg}" != "-c" ]; then + # exception for the capture/checksum options, the rest means: a part of the tests + if [ "${arg}" != "-c" ] && [ "${arg}" != "-C" ]; then do_all_tests=0 fi done @@ -1434,7 +1528,7 @@ if [ $do_all_tests -eq 1 ]; then exit $ret fi -while getopts 'fsltra64bpkch' opt; do +while getopts 'fsltra64bpkchCS' opt; do case $opt in f) subflows_tests @@ -1469,8 +1563,13 @@ while getopts 'fsltra64bpkch' opt; do k) syncookies_tests ;; + S) + checksum_tests + ;; c) ;; + C) + ;; h | *) usage ;;