Skip to content

Commit

Permalink
tcp: be less liberal in TSEcr received while in SYN_RECV state
Browse files Browse the repository at this point in the history
Yong-Hao Zou mentioned that linux was not strict as other OS in 3WHS,
for flows using TCP TS option (RFC 7323)

As hinted by an old comment in tcp_check_req(),
we can check the TSEcr value in the incoming packet corresponds
to one of the SYNACK TSval values we have sent.

In this patch, I record the oldest and most recent values
that SYNACK packets have used.

Send a challenge ACK if we receive a TSEcr outside
of this range, and increase a new SNMP counter.

nstat -az | grep TSEcrRejected
TcpExtTSEcrRejected            0                  0.0

Due to TCP fastopen implementation, do not apply yet these checks
for fastopen flows.

v2: No longer use req->num_timeout, but treq->snt_tsval_first
    to detect when first SYNACK is prepared. This means
    we make sure to not send an initial zero TSval.
    Make sure MPTCP and TCP selftests are passing.
    Change MIB name to TcpExtTSEcrRejected

v1: https://lore.kernel.org/netdev/CADVnQykD8i4ArpSZaPKaoNxLJ2if2ts9m4As+=Jvdkrgx1qMHw@mail.gmail.com/T/

Reported-by: Yong-Hao Zou <yonghaoz1994@gmail.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Reviewed-by: Neal Cardwell <ncardwell@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://patch.msgid.link/20250225171048.3105061-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
  • Loading branch information
Eric Dumazet authored and Jakub Kicinski committed Feb 27, 2025
1 parent 91c8d8e commit 3ba0752
Show file tree
Hide file tree
Showing 8 changed files with 28 additions and 11 deletions.
1 change: 1 addition & 0 deletions Documentation/networking/net_cachelines/snmp.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ unsigned_long LINUX_MIB_TIMEWAITRECYCLED
unsigned_long LINUX_MIB_TIMEWAITKILLED
unsigned_long LINUX_MIB_PAWSACTIVEREJECTED
unsigned_long LINUX_MIB_PAWSESTABREJECTED
unsigned_long LINUX_MIB_TSECR_REJECTED
unsigned_long LINUX_MIB_DELAYEDACKLOST
unsigned_long LINUX_MIB_LISTENOVERFLOWS
unsigned_long LINUX_MIB_LISTENDROPS
Expand Down
2 changes: 2 additions & 0 deletions include/linux/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,8 @@ struct tcp_request_sock {
u32 rcv_isn;
u32 snt_isn;
u32 ts_off;
u32 snt_tsval_first;
u32 snt_tsval_last;
u32 last_oow_ack_time; /* last SYNACK */
u32 rcv_nxt; /* the ack # by SYNACK. For
* FastOpen it's the seq#
Expand Down
1 change: 1 addition & 0 deletions include/uapi/linux/snmp.h
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ enum
LINUX_MIB_TIMEWAITKILLED, /* TimeWaitKilled */
LINUX_MIB_PAWSACTIVEREJECTED, /* PAWSActiveRejected */
LINUX_MIB_PAWSESTABREJECTED, /* PAWSEstabRejected */
LINUX_MIB_TSECRREJECTED, /* TSEcrRejected */
LINUX_MIB_PAWS_OLD_ACK, /* PAWSOldAck */
LINUX_MIB_DELAYEDACKS, /* DelayedACKs */
LINUX_MIB_DELAYEDACKLOCKED, /* DelayedACKLocked */
Expand Down
1 change: 1 addition & 0 deletions net/ipv4/proc.c
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TWKilled", LINUX_MIB_TIMEWAITKILLED),
SNMP_MIB_ITEM("PAWSActive", LINUX_MIB_PAWSACTIVEREJECTED),
SNMP_MIB_ITEM("PAWSEstab", LINUX_MIB_PAWSESTABREJECTED),
SNMP_MIB_ITEM("TSEcrRejected", LINUX_MIB_TSECRREJECTED),
SNMP_MIB_ITEM("PAWSOldAck", LINUX_MIB_PAWS_OLD_ACK),
SNMP_MIB_ITEM("DelayedACKs", LINUX_MIB_DELAYEDACKS),
SNMP_MIB_ITEM("DelayedACKLocked", LINUX_MIB_DELAYEDACKLOCKED),
Expand Down
1 change: 1 addition & 0 deletions net/ipv4/syncookies.c
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,7 @@ static int cookie_tcp_reqsk_init(struct sock *sk, struct sk_buff *skb,
ireq->smc_ok = 0;

treq->snt_synack = 0;
treq->snt_tsval_first = 0;
treq->tfo_listener = false;
treq->txhash = net_tx_rndhash();
treq->rcv_isn = ntohl(th->seq) - 1;
Expand Down
1 change: 1 addition & 0 deletions net/ipv4/tcp_input.c
Original file line number Diff line number Diff line change
Expand Up @@ -7081,6 +7081,7 @@ static void tcp_openreq_init(struct request_sock *req,
tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
tcp_rsk(req)->snt_synack = 0;
tcp_rsk(req)->snt_tsval_first = 0;
tcp_rsk(req)->last_oow_ack_time = 0;
req->mss = rx_opt->mss_clamp;
req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
Expand Down
26 changes: 15 additions & 11 deletions net/ipv4/tcp_minisocks.c
Original file line number Diff line number Diff line change
Expand Up @@ -663,6 +663,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
struct sock *child;
const struct tcphdr *th = tcp_hdr(skb);
__be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
bool tsecr_reject = false;
bool paws_reject = false;
bool own_req;

Expand All @@ -672,8 +673,13 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,

if (tmp_opt.saw_tstamp) {
tmp_opt.ts_recent = READ_ONCE(req->ts_recent);
if (tmp_opt.rcv_tsecr)
if (tmp_opt.rcv_tsecr) {
if (inet_rsk(req)->tstamp_ok && !fastopen)
tsecr_reject = !between(tmp_opt.rcv_tsecr,
tcp_rsk(req)->snt_tsval_first,
READ_ONCE(tcp_rsk(req)->snt_tsval_last));
tmp_opt.rcv_tsecr -= tcp_rsk(req)->ts_off;
}
/* We do not store true stamp, but it is not required,
* it can be estimated (approximately)
* from another data.
Expand Down Expand Up @@ -788,18 +794,14 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
tcp_rsk(req)->snt_isn + 1))
return sk;

/* Also, it would be not so bad idea to check rcv_tsecr, which
* is essentially ACK extension and too early or too late values
* should cause reset in unsynchronized states.
*/

/* RFC793: "first check sequence number". */

if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq,
TCP_SKB_CB(skb)->end_seq,
tcp_rsk(req)->rcv_nxt,
tcp_rsk(req)->rcv_nxt +
tcp_synack_window(req))) {
if (paws_reject || tsecr_reject ||
!tcp_in_window(TCP_SKB_CB(skb)->seq,
TCP_SKB_CB(skb)->end_seq,
tcp_rsk(req)->rcv_nxt,
tcp_rsk(req)->rcv_nxt +
tcp_synack_window(req))) {
/* Out of window: send ACK and drop. */
if (!(flg & TCP_FLAG_RST) &&
!tcp_oow_rate_limited(sock_net(sk), skb,
Expand All @@ -808,6 +810,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
req->rsk_ops->send_ack(sk, skb, req);
if (paws_reject)
NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
else if (tsecr_reject)
NET_INC_STATS(sock_net(sk), LINUX_MIB_TSECRREJECTED);
return NULL;
}

Expand Down
6 changes: 6 additions & 0 deletions net/ipv4/tcp_output.c
Original file line number Diff line number Diff line change
Expand Up @@ -943,6 +943,12 @@ static unsigned int tcp_synack_options(const struct sock *sk,
opts->options |= OPTION_TS;
opts->tsval = tcp_skb_timestamp_ts(tcp_rsk(req)->req_usec_ts, skb) +
tcp_rsk(req)->ts_off;
if (!tcp_rsk(req)->snt_tsval_first) {
if (!opts->tsval)
opts->tsval = ~0U;
tcp_rsk(req)->snt_tsval_first = opts->tsval;
}
WRITE_ONCE(tcp_rsk(req)->snt_tsval_last, opts->tsval);
opts->tsecr = READ_ONCE(req->ts_recent);
remaining -= TCPOLEN_TSTAMP_ALIGNED;
}
Expand Down

0 comments on commit 3ba0752

Please sign in to comment.