From b84235e291045799e100a40c485439182087bae9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 17 Jan 2019 11:23:32 -0800 Subject: [PATCH 01/11] tcp: do not set snd_ssthresh in tcp_create_openreq_child() New sockets get the field set to TCP_INFINITE_SSTHRESH in tcp_init_sock() In case a socket had this field changed and transitions to TCP_LISTEN state, tcp_disconnect() also makes sure snd_ssthresh is set to TCP_INFINITE_SSTHRESH. So a listener has this field set to TCP_INFINITE_SSTHRESH already. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_minisocks.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 12affb7864d98..0cffa5937b126 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -488,7 +488,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->packets_out = 0; newtp->retrans_out = 0; newtp->sacked_out = 0; - newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH; newtp->tlp_high_seq = 0; newtp->lsndtime = tcp_jiffies32; newsk->sk_txhash = treq->txhash; From 6a408147eac4a7e51ead1bcf939a71b64c2daadf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 17 Jan 2019 11:23:33 -0800 Subject: [PATCH 02/11] tcp: move icsk_rto init to tcp_disconnect() If we make sure a listener always has its icsk_rto field set to TCP_TIMEOUT_INIT, we do not need to rewrite this field after a new clone is created. tcp_disconnect() is very seldom used in real applications. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 1 + net/ipv4/tcp_minisocks.c | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 27e2f68370623..9d8131f95a973 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2579,6 +2579,7 @@ int tcp_disconnect(struct sock *sk, int flags) icsk->icsk_backoff = 0; tp->snd_cwnd = 2; icsk->icsk_probes_out = 0; + icsk->icsk_rto = TCP_TIMEOUT_INIT; tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tp->snd_cwnd_cnt = 0; tp->window_clamp = 0; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 0cffa5937b126..9799f1d32e44f 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -482,7 +482,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->srtt_us = 0; newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); minmax_reset(&newtp->rtt_min, tcp_jiffies32, ~0U); - newicsk->icsk_rto = TCP_TIMEOUT_INIT; newicsk->icsk_ack.lrcvtime = tcp_jiffies32; newtp->packets_out = 0; From eb2c80ca87b10814e3067db9c0215701a99dc84f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 17 Jan 2019 11:23:34 -0800 Subject: [PATCH 03/11] tcp: do not clear packets_out in tcp_create_openreq_child() New sockets have this field cleared, and tcp_disconnect() calls tcp_write_queue_purge() which among other things also clear tp->packets_out So a listener is guaranteed to have this field cleared. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_minisocks.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 9799f1d32e44f..830e4eb558f88 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -484,7 +484,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, minmax_reset(&newtp->rtt_min, tcp_jiffies32, ~0U); newicsk->icsk_ack.lrcvtime = tcp_jiffies32; - newtp->packets_out = 0; newtp->retrans_out = 0; newtp->sacked_out = 0; newtp->tlp_high_seq = 0; From a0070e463f3fb303b64dea60fe2b0a2e60fce61c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 17 Jan 2019 11:23:35 -0800 Subject: [PATCH 04/11] tcp: do not clear srtt_us in tcp_create_openreq_child All listeners have this field cleared already, since tcp_disconnect() clears it and newly created sockets have also a zero value here. So a clone will inherit a zero value here. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_minisocks.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 830e4eb558f88..977ad1c0cccad 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -479,7 +479,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, tcp_init_wl(newtp, treq->rcv_isn); - newtp->srtt_us = 0; newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); minmax_reset(&newtp->rtt_min, tcp_jiffies32, ~0U); newicsk->icsk_ack.lrcvtime = tcp_jiffies32; From b9e2e689aab293c3da0ceac0921449a07b692f1f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 17 Jan 2019 11:23:36 -0800 Subject: [PATCH 05/11] tcp: move mdev_us init to tcp_disconnect() If we make sure a listener always has its mdev_us field set to TCP_TIMEOUT_INIT, we do not need to rewrite this field after a new clone is created. tcp_disconnect() is very seldom used in real applications. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 1 + net/ipv4/tcp_minisocks.c | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 9d8131f95a973..731b1c6e88a99 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2572,6 +2572,7 @@ int tcp_disconnect(struct sock *sk, int flags) sk->sk_shutdown = 0; sock_reset_flag(sk, SOCK_DONE); tp->srtt_us = 0; + tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); tp->rcv_rtt_last_tsecr = 0; tp->write_seq += tp->max_window + 2; if (tp->write_seq == 0) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 977ad1c0cccad..35906b44e2cfa 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -479,7 +479,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, tcp_init_wl(newtp, treq->rcv_isn); - newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); minmax_reset(&newtp->rtt_min, tcp_jiffies32, ~0U); newicsk->icsk_ack.lrcvtime = tcp_jiffies32; From 3a9a57f637943404920a8945323dc733845e697c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 17 Jan 2019 11:23:37 -0800 Subject: [PATCH 06/11] tcp: move snd_cwnd & snd_cwnd_cnt init to tcp_disconnect() Passive connections can inherit proper value by cloning, if we make sure all listeners have the proper values there. tcp_disconnect() was setting snd_cwnd to 2, which seems quite obsolete since IW10 adoption. Also remove an obsolete comment. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_minisocks.c | 8 -------- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 731b1c6e88a99..3f99ad92eaedb 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2578,10 +2578,10 @@ int tcp_disconnect(struct sock *sk, int flags) if (tp->write_seq == 0) tp->write_seq = 1; icsk->icsk_backoff = 0; - tp->snd_cwnd = 2; icsk->icsk_probes_out = 0; icsk->icsk_rto = TCP_TIMEOUT_INIT; tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; + tp->snd_cwnd = TCP_INIT_CWND; tp->snd_cwnd_cnt = 0; tp->window_clamp = 0; tp->delivered_ce = 0; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 35906b44e2cfa..ca96e7da99b6e 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -490,14 +490,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->last_oow_ack_time = 0; newtp->total_retrans = req->num_retrans; - /* So many TCP implementations out there (incorrectly) count the - * initial SYN frame in their delayed-ACK and congestion control - * algorithms that we must have the following bandaid to talk - * efficiently to them. -DaveM - */ - newtp->snd_cwnd = TCP_INIT_CWND; - newtp->snd_cwnd_cnt = 0; - /* There's a bubble in the pipe until at least the first ACK. */ newtp->app_limited = ~0U; From 5d836764620dfa3f243bdf5a678db300341195ef Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 17 Jan 2019 11:23:38 -0800 Subject: [PATCH 07/11] tcp: do not clear urg_data in tcp_create_openreq_child All listeners have this field cleared already, since tcp_disconnect() clears it and newly created sockets have also a zero value here. So a clone will inherit a zero value here. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_minisocks.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index ca96e7da99b6e..2747aa72b8e2b 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -501,8 +501,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->rx_opt.dsack = 0; newtp->rx_opt.num_sacks = 0; - newtp->urg_data = 0; - if (sock_flag(newsk, SOCK_KEEPOPEN)) inet_csk_reset_keepalive_timer(newsk, keepalive_time_when(newtp)); From 5c701549c9a653a4335dbb2aecb4935de442b87d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 17 Jan 2019 11:23:39 -0800 Subject: [PATCH 08/11] tcp: move retrans_out, sacked_out, tlp_high_seq, last_oow_ack_time init to tcp_disconnect() If we make sure all listeners have these fields cleared, then a clone will also inherit zero values. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 4 ++++ net/ipv4/tcp_minisocks.c | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3f99ad92eaedb..551ad8604bea0 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2605,6 +2605,10 @@ int tcp_disconnect(struct sock *sk, int flags) tp->duplicate_sack[0].end_seq = 0; tp->dsack_dups = 0; tp->reord_seen = 0; + tp->retrans_out = 0; + tp->sacked_out = 0; + tp->tlp_high_seq = 0; + tp->last_oow_ack_time = 0; /* Clean up fastopen related fields */ tcp_free_fastopen_req(tp); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 2747aa72b8e2b..29fba13849a79 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -482,12 +482,8 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, minmax_reset(&newtp->rtt_min, tcp_jiffies32, ~0U); newicsk->icsk_ack.lrcvtime = tcp_jiffies32; - newtp->retrans_out = 0; - newtp->sacked_out = 0; - newtp->tlp_high_seq = 0; newtp->lsndtime = tcp_jiffies32; newsk->sk_txhash = treq->txhash; - newtp->last_oow_ack_time = 0; newtp->total_retrans = req->num_retrans; /* There's a bubble in the pipe until at least the first ACK. */ From 6cda8b7493ac323c3b58a9a897abc0e6432d5a1d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 17 Jan 2019 11:23:40 -0800 Subject: [PATCH 09/11] tcp: move app_limited init to tcp_disconnect() If we make sure all listeners have app_limited set to ~0U, then a clone will also inherit proper initial value. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 3 +++ net/ipv4/tcp_minisocks.c | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 551ad8604bea0..5f15fcc9612ae 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2609,6 +2609,9 @@ int tcp_disconnect(struct sock *sk, int flags) tp->sacked_out = 0; tp->tlp_high_seq = 0; tp->last_oow_ack_time = 0; + /* There's a bubble in the pipe until at least the first ACK. */ + tp->app_limited = ~0U; + /* Clean up fastopen related fields */ tcp_free_fastopen_req(tp); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 29fba13849a79..13f3c6444efa6 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -486,9 +486,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newsk->sk_txhash = treq->txhash; newtp->total_retrans = req->num_retrans; - /* There's a bubble in the pipe until at least the first ACK. */ - newtp->app_limited = ~0U; - tcp_init_xmit_timers(newsk); newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1; From 792c4354a508c42c69f4771287cb99dde4ab79be Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 17 Jan 2019 11:23:41 -0800 Subject: [PATCH 10/11] tcp: move tp->rack init to tcp_disconnect() If we make sure all listeners have proper tp->rack value, then a clone will also inherit proper initial value. Note that fresh sockets init tp->rack from tcp_init_sock() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 6 ++++++ net/ipv4/tcp_minisocks.c | 6 ------ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 5f15fcc9612ae..2348199e6cee9 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2611,6 +2611,12 @@ int tcp_disconnect(struct sock *sk, int flags) tp->last_oow_ack_time = 0; /* There's a bubble in the pipe until at least the first ACK. */ tp->app_limited = ~0U; + tp->rack.mstamp = 0; + tp->rack.advanced = 0; + tp->rack.reo_wnd_steps = 1; + tp->rack.last_delivered = 0; + tp->rack.reo_wnd_persist = 0; + tp->rack.dsack_seen = 0; /* Clean up fastopen related fields */ diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 13f3c6444efa6..27e11eb4164ca 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -535,12 +535,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->fastopen_req = NULL; newtp->fastopen_rsk = NULL; newtp->syn_data_acked = 0; - newtp->rack.mstamp = 0; - newtp->rack.advanced = 0; - newtp->rack.reo_wnd_steps = 1; - newtp->rack.last_delivered = 0; - newtp->rack.reo_wnd_persist = 0; - newtp->rack.dsack_seen = 0; __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS); From 6bcdc40dddfe79408e809ec1e2c13f08c863c0b2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 17 Jan 2019 11:23:42 -0800 Subject: [PATCH 11/11] tcp: move rx_opt & syn_data_acked init to tcp_disconnect() If we make sure all listeners have these fields cleared, then a clone will also inherit zero values. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 4 ++++ net/ipv4/tcp_minisocks.c | 6 ------ 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2348199e6cee9..541bdb9f81d79 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2617,6 +2617,10 @@ int tcp_disconnect(struct sock *sk, int flags) tp->rack.last_delivered = 0; tp->rack.reo_wnd_persist = 0; tp->rack.dsack_seen = 0; + tp->syn_data_acked = 0; + tp->rx_opt.saw_tstamp = 0; + tp->rx_opt.dsack = 0; + tp->rx_opt.num_sacks = 0; /* Clean up fastopen related fields */ diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 27e11eb4164ca..182595e2d40ff 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -489,11 +489,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, tcp_init_xmit_timers(newsk); newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1; - newtp->rx_opt.saw_tstamp = 0; - - newtp->rx_opt.dsack = 0; - newtp->rx_opt.num_sacks = 0; - if (sock_flag(newsk, SOCK_KEEPOPEN)) inet_csk_reset_keepalive_timer(newsk, keepalive_time_when(newtp)); @@ -534,7 +529,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, tcp_ecn_openreq_child(newtp, req); newtp->fastopen_req = NULL; newtp->fastopen_rsk = NULL; - newtp->syn_data_acked = 0; __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS);