From 179ac35f2f78ec54f14b6f3134b593403fd4c467 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng <ycheng@google.com> Date: Fri, 25 Sep 2020 10:04:28 -0700 Subject: [PATCH 1/4] tcp: consistently check retransmit hint tcp_simple_retransmit() used for path MTU discovery may not adjust the retransmit hint properly by deducting retrans_out before checking it to adjust the hint. This patch fixes this by a correct routine tcp_mark_skb_lost() already used by the RACK loss detection. Signed-off-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/ipv4/tcp_input.c | 9 ++------- net/ipv4/tcp_recovery.c | 2 +- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 748980862a09e..f9f4321d18b3b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2675,13 +2675,8 @@ void tcp_simple_retransmit(struct sock *sk) skb_rbtree_walk(skb, &sk->tcp_rtx_queue) { if (tcp_skb_seglen(skb) > mss && - !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { - if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { - TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; - tp->retrans_out -= tcp_skb_pcount(skb); - } - tcp_skb_mark_lost_uncond_verify(tp, skb); - } + !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) + tcp_mark_skb_lost(sk, skb); } tcp_clear_retrans_hints_partial(tp); diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index fdb715bdd2d11..26a42289a8703 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -246,6 +246,6 @@ void tcp_newreno_mark_lost(struct sock *sk, bool snd_una_advanced) tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, mss, mss, GFP_ATOMIC); - tcp_skb_mark_lost_uncond_verify(tp, skb); + tcp_mark_skb_lost(sk, skb); } } From fd2146741c98569c8b4dc41b8ccae597150b122c Mon Sep 17 00:00:00 2001 From: Yuchung Cheng <ycheng@google.com> Date: Fri, 25 Sep 2020 10:04:29 -0700 Subject: [PATCH 2/4] tcp: move tcp_mark_skb_lost A pure refactor to move tcp_mark_skb_lost to tcp_input.c to prepare for the later loss marking consolidation. Signed-off-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/ipv4/tcp_input.c | 14 ++++++++++++++ net/ipv4/tcp_recovery.c | 14 -------------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index f9f4321d18b3b..1cf4fcf8f41bf 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1016,6 +1016,20 @@ static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb) tp->retransmit_skb_hint = skb; } +void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + + tcp_skb_mark_lost_uncond_verify(tp, skb); + if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { + /* Account for retransmits that are lost again */ + TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; + tp->retrans_out -= tcp_skb_pcount(skb); + NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT, + tcp_skb_pcount(skb)); + } +} + /* Sum the number of packets on the wire we have marked as lost. * There are two cases we care about here: * a) Packet hasn't been marked lost (nor retransmitted), diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index 26a42289a8703..f65a3ddd0d58a 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -2,20 +2,6 @@ #include <linux/tcp.h> #include <net/tcp.h> -void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb) -{ - struct tcp_sock *tp = tcp_sk(sk); - - tcp_skb_mark_lost_uncond_verify(tp, skb); - if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { - /* Account for retransmits that are lost again */ - TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; - tp->retrans_out -= tcp_skb_pcount(skb); - NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT, - tcp_skb_pcount(skb)); - } -} - static bool tcp_rack_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2) { return t1 > t2 || (t1 == t2 && after(seq1, seq2)); From 686989700cabc444f25c1f1eb022aca8f2fc14b2 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng <ycheng@google.com> Date: Fri, 25 Sep 2020 10:04:30 -0700 Subject: [PATCH 3/4] tcp: simplify tcp_mark_skb_lost This patch consolidates and simplifes the loss marking logic used by a few loss detections (RACK, RFC6675, NewReno). Previously each detection uses a subset of several intertwined subroutines. This unncessary complexity has led to bugs (and fixes of bug fixes). tcp_mark_skb_lost now is the single one routine to mark a packet loss when a loss detection caller deems an skb ist lost: 1. rewind tp->retransmit_hint_skb if skb has lower sequence or all lost ones have been retransmitted. 2. book-keeping: adjust flags and counts depending on if skb was retransmitted or not. Signed-off-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/ipv4/tcp_input.c | 59 +++++++++++++++++--------------------------- 1 file changed, 22 insertions(+), 37 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 1cf4fcf8f41bf..63417c400b53f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1006,7 +1006,11 @@ static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq, ts ? LINUX_MIB_TCPTSREORDER : LINUX_MIB_TCPSACKREORDER); } -/* This must be called before lost_out is incremented */ + /* This must be called before lost_out or retrans_out are updated + * on a new loss, because we want to know if all skbs previously + * known to be lost have already been retransmitted, indicating + * that this newly lost skb is our next skb to retransmit. + */ static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb) { if ((!tp->retransmit_skb_hint && tp->retrans_out >= tp->lost_out) || @@ -1018,32 +1022,25 @@ static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb) void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb) { + __u8 sacked = TCP_SKB_CB(skb)->sacked; struct tcp_sock *tp = tcp_sk(sk); - tcp_skb_mark_lost_uncond_verify(tp, skb); - if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { - /* Account for retransmits that are lost again */ - TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; - tp->retrans_out -= tcp_skb_pcount(skb); - NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT, - tcp_skb_pcount(skb)); - } -} - -/* Sum the number of packets on the wire we have marked as lost. - * There are two cases we care about here: - * a) Packet hasn't been marked lost (nor retransmitted), - * and this is the first loss. - * b) Packet has been marked both lost and retransmitted, - * and this means we think it was lost again. - */ -static void tcp_sum_lost(struct tcp_sock *tp, struct sk_buff *skb) -{ - __u8 sacked = TCP_SKB_CB(skb)->sacked; + if (sacked & TCPCB_SACKED_ACKED) + return; - if (!(sacked & TCPCB_LOST) || - ((sacked & TCPCB_LOST) && (sacked & TCPCB_SACKED_RETRANS))) - tp->lost += tcp_skb_pcount(skb); + tcp_verify_retransmit_hint(tp, skb); + if (sacked & TCPCB_LOST) { + if (sacked & TCPCB_SACKED_RETRANS) { + /* Account for retransmits that are lost again */ + TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; + tp->retrans_out -= tcp_skb_pcount(skb); + NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT, + tcp_skb_pcount(skb)); + } + } else { + tp->lost_out += tcp_skb_pcount(skb); + TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; + } } static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb) @@ -1057,17 +1054,6 @@ static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb) } } -void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb) -{ - tcp_verify_retransmit_hint(tp, skb); - - tcp_sum_lost(tp, skb); - if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) { - tp->lost_out += tcp_skb_pcount(skb); - TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - } -} - /* Updates the delivered and delivered_ce counts */ static void tcp_count_delivered(struct tcp_sock *tp, u32 delivered, bool ece_ack) @@ -2688,8 +2674,7 @@ void tcp_simple_retransmit(struct sock *sk) unsigned int mss = tcp_current_mss(sk); skb_rbtree_walk(skb, &sk->tcp_rtx_queue) { - if (tcp_skb_seglen(skb) > mss && - !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) + if (tcp_skb_seglen(skb) > mss) tcp_mark_skb_lost(sk, skb); } From 534a2109fb0c31786c98845aad18d069fb874e6c Mon Sep 17 00:00:00 2001 From: Yuchung Cheng <ycheng@google.com> Date: Fri, 25 Sep 2020 10:04:31 -0700 Subject: [PATCH 4/4] tcp: consolidate tcp_mark_skb_lost and tcp_skb_mark_lost tcp_skb_mark_lost is used by RFC6675-SACK and can easily be replaced with the new tcp_mark_skb_lost handler. Signed-off-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/ipv4/tcp_input.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 63417c400b53f..f7b3e37d21985 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1043,17 +1043,6 @@ void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb) } } -static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb) -{ - if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) { - tcp_verify_retransmit_hint(tp, skb); - - tp->lost_out += tcp_skb_pcount(skb); - tcp_sum_lost(tp, skb); - TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - } -} - /* Updates the delivered and delivered_ce counts */ static void tcp_count_delivered(struct tcp_sock *tp, u32 delivered, bool ece_ack) @@ -2308,7 +2297,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) if (cnt > packets) break; - tcp_skb_mark_lost(tp, skb); + if (!(TCP_SKB_CB(skb)->sacked & TCPCB_LOST)) + tcp_mark_skb_lost(sk, skb); if (mark_head) break;