---

yaml --- r: 66267 b: refs/heads/master c: e60402d h: refs/heads/master i: 66265: 0f37e53 66263: 3f8c960 v: v3
git-mirror · Oct 10, 2007 · 438ab1d · 438ab1d
1 parent 0288aab
commit 438ab1d
Show file tree

Hide file tree

Showing 7 changed files with 81 additions and 43 deletions.
diff --git a/[refs] b/[refs]
@@ -1,2 +1,2 @@
 ---
-refs/heads/master: b9c4595bc4947faa236a849324fe3492e388d949
+refs/heads/master: e60402d0a909ca2e6e2fbdf9ed004ef0fae36d33
diff --git a/trunk/include/net/tcp.h b/trunk/include/net/tcp.h
@@ -719,6 +719,34 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
 		icsk->icsk_ca_ops->cwnd_event(sk, event);
 }
 
+/* These functions determine how the current flow behaves in respect of SACK
+ * handling. SACK is negotiated with the peer, and therefore it can vary
+ * between different flows.
+ *
+ * tcp_is_sack - SACK enabled
+ * tcp_is_reno - No SACK
+ * tcp_is_fack - FACK enabled, implies SACK enabled
+ */
+static inline int tcp_is_sack(const struct tcp_sock *tp)
+{
+	return tp->rx_opt.sack_ok;
+}
+
+static inline int tcp_is_reno(const struct tcp_sock *tp)
+{
+	return !tcp_is_sack(tp);
+}
+
+static inline int tcp_is_fack(const struct tcp_sock *tp)
+{
+	return tp->rx_opt.sack_ok & 2;
+}
+
+static inline void tcp_enable_fack(struct tcp_sock *tp)
+{
+	tp->rx_opt.sack_ok |= 2;
+}
+
 static inline unsigned int tcp_left_out(const struct tcp_sock *tp)
 {
 	return tp->sacked_out + tp->lost_out;

diff --git a/trunk/net/ipv4/tcp.c b/trunk/net/ipv4/tcp.c
@@ -2014,7 +2014,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 
 	if (tp->rx_opt.tstamp_ok)
 		info->tcpi_options |= TCPI_OPT_TIMESTAMPS;
-	if (tp->rx_opt.sack_ok)
+	if (tcp_is_sack(tp))
 		info->tcpi_options |= TCPI_OPT_SACK;
 	if (tp->rx_opt.wscale_ok) {
 		info->tcpi_options |= TCPI_OPT_WSCALE;

diff --git a/trunk/net/ipv4/tcp_input.c b/trunk/net/ipv4/tcp_input.c
@@ -111,10 +111,6 @@ int sysctl_tcp_abc __read_mostly;
 #define FLAG_FORWARD_PROGRESS	(FLAG_ACKED|FLAG_DATA_SACKED)
 #define FLAG_ANY_PROGRESS	(FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED)
 
-#define IsReno(tp) ((tp)->rx_opt.sack_ok == 0)
-#define IsFack(tp) ((tp)->rx_opt.sack_ok & 2)
-#define IsDSack(tp) ((tp)->rx_opt.sack_ok & 4)
-
 #define IsSackFrto() (sysctl_tcp_frto == 0x2)
 
 #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
@@ -860,6 +856,21 @@ void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
 	}
 }
 
+/*
+ * Packet counting of FACK is based on in-order assumptions, therefore TCP
+ * disables it when reordering is detected
+ */
+static void tcp_disable_fack(struct tcp_sock *tp)
+{
+	tp->rx_opt.sack_ok &= ~2;
+}
+
+/* Take a notice that peer is sending DSACKs */
+static void tcp_dsack_seen(struct tcp_sock *tp)
+{
+	tp->rx_opt.sack_ok |= 4;
+}
+
 /* Initialize metrics on socket. */
 
 static void tcp_init_metrics(struct sock *sk)
@@ -881,7 +892,7 @@ static void tcp_init_metrics(struct sock *sk)
 	}
 	if (dst_metric(dst, RTAX_REORDERING) &&
 	    tp->reordering != dst_metric(dst, RTAX_REORDERING)) {
-		tp->rx_opt.sack_ok &= ~2;
+		tcp_disable_fack(tp);
 		tp->reordering = dst_metric(dst, RTAX_REORDERING);
 	}
 
@@ -943,9 +954,9 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
 		/* This exciting event is worth to be remembered. 8) */
 		if (ts)
 			NET_INC_STATS_BH(LINUX_MIB_TCPTSREORDER);
-		else if (IsReno(tp))
+		else if (tcp_is_reno(tp))
 			NET_INC_STATS_BH(LINUX_MIB_TCPRENOREORDER);
-		else if (IsFack(tp))
+		else if (tcp_is_fack(tp))
 			NET_INC_STATS_BH(LINUX_MIB_TCPFACKREORDER);
 		else
 			NET_INC_STATS_BH(LINUX_MIB_TCPSACKREORDER);
@@ -957,8 +968,7 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
 		       tp->sacked_out,
 		       tp->undo_marker ? tp->undo_retrans : 0);
 #endif
-		/* Disable FACK yet. */
-		tp->rx_opt.sack_ok &= ~2;
+		tcp_disable_fack(tp);
 	}
 }
 
@@ -1020,7 +1030,7 @@ static int tcp_check_dsack(struct tcp_sock *tp, struct sk_buff *ack_skb,
 
 	if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) {
 		dup_sack = 1;
-		tp->rx_opt.sack_ok |= 4;
+		tcp_dsack_seen(tp);
 		NET_INC_STATS_BH(LINUX_MIB_TCPDSACKRECV);
 	} else if (num_sacks > 1) {
 		u32 end_seq_1 = ntohl(get_unaligned(&sp[1].end_seq));
@@ -1029,7 +1039,7 @@ static int tcp_check_dsack(struct tcp_sock *tp, struct sk_buff *ack_skb,
 		if (!after(end_seq_0, end_seq_1) &&
 		    !before(start_seq_0, start_seq_1)) {
 			dup_sack = 1;
-			tp->rx_opt.sack_ok |= 4;
+			tcp_dsack_seen(tp);
 			NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFORECV);
 		}
 	}
@@ -1326,7 +1336,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 				continue;
 			if ((TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) &&
 			    after(lost_retrans, TCP_SKB_CB(skb)->ack_seq) &&
-			    (IsFack(tp) ||
+			    (tcp_is_fack(tp) ||
 			     !before(lost_retrans,
 				     TCP_SKB_CB(skb)->ack_seq + tp->reordering *
 				     tp->mss_cache))) {
@@ -1526,7 +1536,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
 
 	tp->lost_out = 0;
 	tp->retrans_out = 0;
-	if (IsReno(tp))
+	if (tcp_is_reno(tp))
 		tcp_reset_reno_sack(tp);
 
 	tcp_for_write_queue(skb, sk) {
@@ -1668,7 +1678,7 @@ static int tcp_check_sack_reneging(struct sock *sk)
 
 static inline int tcp_fackets_out(struct tcp_sock *tp)
 {
-	return IsReno(tp) ? tp->sacked_out+1 : tp->fackets_out;
+	return tcp_is_reno(tp) ? tp->sacked_out+1 : tp->fackets_out;
 }
 
 static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb)
@@ -1872,7 +1882,7 @@ static void tcp_update_scoreboard(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	if (IsFack(tp)) {
+	if (tcp_is_fack(tp)) {
 		int lost = tp->fackets_out - tp->reordering;
 		if (lost <= 0)
 			lost = 1;
@@ -1886,7 +1896,7 @@ static void tcp_update_scoreboard(struct sock *sk)
 	 * Hence, we can detect timed out packets during fast
 	 * retransmit without falling to slow start.
 	 */
-	if (!IsReno(tp) && tcp_head_timedout(sk)) {
+	if (!tcp_is_reno(tp) && tcp_head_timedout(sk)) {
 		struct sk_buff *skb;
 
 		skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint
@@ -1938,7 +1948,7 @@ static void tcp_cwnd_down(struct sock *sk, int flag)
 	int decr = tp->snd_cwnd_cnt + 1;
 
 	if ((flag&(FLAG_ANY_PROGRESS|FLAG_DSACKING_ACK)) ||
-	    (IsReno(tp) && !(flag&FLAG_NOT_DUP))) {
+	    (tcp_is_reno(tp) && !(flag&FLAG_NOT_DUP))) {
 		tp->snd_cwnd_cnt = decr&1;
 		decr >>= 1;
 
@@ -2029,7 +2039,7 @@ static int tcp_try_undo_recovery(struct sock *sk)
 			NET_INC_STATS_BH(LINUX_MIB_TCPFULLUNDO);
 		tp->undo_marker = 0;
 	}
-	if (tp->snd_una == tp->high_seq && IsReno(tp)) {
+	if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
 		/* Hold old state until something *above* high_seq
 		 * is ACKed. For Reno it is MUST to prevent false
 		 * fast retransmits (RFC2582). SACK TCP is safe. */
@@ -2059,7 +2069,7 @@ static int tcp_try_undo_partial(struct sock *sk, int acked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	/* Partial ACK arrived. Force Hoe's retransmit. */
-	int failed = IsReno(tp) || tp->fackets_out>tp->reordering;
+	int failed = tcp_is_reno(tp) || tp->fackets_out>tp->reordering;
 
 	if (tcp_may_undo(tp)) {
 		/* Plain luck! Hole if filled with delayed
@@ -2104,7 +2114,7 @@ static int tcp_try_undo_loss(struct sock *sk)
 		NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO);
 		inet_csk(sk)->icsk_retransmits = 0;
 		tp->undo_marker = 0;
-		if (!IsReno(tp))
+		if (tcp_is_sack(tp))
 			tcp_set_ca_state(sk, TCP_CA_Open);
 		return 1;
 	}
@@ -2251,14 +2261,14 @@ tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
 			if (!tp->undo_marker ||
 			    /* For SACK case do not Open to allow to undo
 			     * catching for all duplicate ACKs. */
-			    IsReno(tp) || tp->snd_una != tp->high_seq) {
+			    tcp_is_reno(tp) || tp->snd_una != tp->high_seq) {
 				tp->undo_marker = 0;
 				tcp_set_ca_state(sk, TCP_CA_Open);
 			}
 			break;
 
 		case TCP_CA_Recovery:
-			if (IsReno(tp))
+			if (tcp_is_reno(tp))
 				tcp_reset_reno_sack(tp);
 			if (tcp_try_undo_recovery(sk))
 				return;
@@ -2271,7 +2281,7 @@ tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
 	switch (icsk->icsk_ca_state) {
 	case TCP_CA_Recovery:
 		if (!(flag & FLAG_SND_UNA_ADVANCED)) {
-			if (IsReno(tp) && is_dupack)
+			if (tcp_is_reno(tp) && is_dupack)
 				tcp_add_reno_sack(sk);
 		} else
 			do_lost = tcp_try_undo_partial(sk, pkts_acked);
@@ -2288,7 +2298,7 @@ tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
 			return;
 		/* Loss is undone; fall through to processing in Open state. */
 	default:
-		if (IsReno(tp)) {
+		if (tcp_is_reno(tp)) {
 			if (flag & FLAG_SND_UNA_ADVANCED)
 				tcp_reset_reno_sack(tp);
 			if (is_dupack)
@@ -2316,7 +2326,7 @@ tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
 
 		/* Otherwise enter Recovery state */
 
-		if (IsReno(tp))
+		if (tcp_is_reno(tp))
 			NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERY);
 		else
 			NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERY);
@@ -2573,7 +2583,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
 		tcp_ack_update_rtt(sk, acked, seq_rtt);
 		tcp_ack_packets_out(sk);
 
-		if (IsReno(tp))
+		if (tcp_is_reno(tp))
 			tcp_remove_reno_sacks(sk, pkts_acked);
 
 		if (ca_ops->pkts_acked) {
@@ -2599,7 +2609,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
 	BUG_TRAP((int)tp->sacked_out >= 0);
 	BUG_TRAP((int)tp->lost_out >= 0);
 	BUG_TRAP((int)tp->retrans_out >= 0);
-	if (!tp->packets_out && tp->rx_opt.sack_ok) {
+	if (!tp->packets_out && tcp_is_sack(tp)) {
 		const struct inet_connection_sock *icsk = inet_csk(sk);
 		if (tp->lost_out) {
 			printk(KERN_DEBUG "Leak l=%u %d\n",
@@ -2779,7 +2789,7 @@ static int tcp_process_frto(struct sock *sk, int flag)
 		return 1;
 	}
 
-	if (!IsSackFrto() || IsReno(tp)) {
+	if (!IsSackFrto() || tcp_is_reno(tp)) {
 		/* RFC4138 shortcoming in step 2; should also have case c):
 		 * ACK isn't duplicate nor advances window, e.g., opposite dir
 		 * data, winupdate
@@ -3263,7 +3273,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
 	 * Probably, we should reset in this case. For now drop them.
 	 */
 	__skb_queue_purge(&tp->out_of_order_queue);
-	if (tp->rx_opt.sack_ok)
+	if (tcp_is_sack(tp))
 		tcp_sack_reset(&tp->rx_opt);
 	sk_stream_mem_reclaim(sk);
 
@@ -3293,7 +3303,7 @@ static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_se
 
 static void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq)
 {
-	if (tp->rx_opt.sack_ok && sysctl_tcp_dsack) {
+	if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
 		if (before(seq, tp->rcv_nxt))
 			NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOLDSENT);
 		else
@@ -3323,7 +3333,7 @@ static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb)
 		NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOST);
 		tcp_enter_quickack_mode(sk);
 
-		if (tp->rx_opt.sack_ok && sysctl_tcp_dsack) {
+		if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
 			u32 end_seq = TCP_SKB_CB(skb)->end_seq;
 
 			if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
@@ -3639,7 +3649,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 
 	if (!skb_peek(&tp->out_of_order_queue)) {
 		/* Initial out of order segment, build 1 SACK. */
-		if (tp->rx_opt.sack_ok) {
+		if (tcp_is_sack(tp)) {
 			tp->rx_opt.num_sacks = 1;
 			tp->rx_opt.dsack     = 0;
 			tp->rx_opt.eff_sacks = 1;
@@ -3704,7 +3714,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 		}
 
 add_sack:
-		if (tp->rx_opt.sack_ok)
+		if (tcp_is_sack(tp))
 			tcp_sack_new_ofo_skb(sk, seq, end_seq);
 	}
 }
@@ -3893,7 +3903,7 @@ static int tcp_prune_queue(struct sock *sk)
 		 * is in a sad state like this, we care only about integrity
 		 * of the connection not performance.
 		 */
-		if (tp->rx_opt.sack_ok)
+		if (tcp_is_sack(tp))
 			tcp_sack_reset(&tp->rx_opt);
 		sk_stream_mem_reclaim(sk);
 	}
@@ -4594,8 +4604,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 			tp->tcp_header_len = sizeof(struct tcphdr);
 		}
 
-		if (tp->rx_opt.sack_ok && sysctl_tcp_fack)
-			tp->rx_opt.sack_ok |= 2;
+		if (tcp_is_sack(tp) && sysctl_tcp_fack)
+			tcp_enable_fack(tp);
 
 		tcp_mtup_init(sk);
 		tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);

diff --git a/trunk/net/ipv4/tcp_minisocks.c b/trunk/net/ipv4/tcp_minisocks.c
@@ -445,7 +445,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		newtp->rx_opt.tstamp_ok = ireq->tstamp_ok;
 		if ((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) {
 			if (sysctl_tcp_fack)
-				newtp->rx_opt.sack_ok |= 2;
+				tcp_enable_fack(newtp);
 		}
 		newtp->window_clamp = req->window_clamp;
 		newtp->rcv_ssthresh = req->rcv_wnd;

diff --git a/trunk/net/ipv4/tcp_output.c b/trunk/net/ipv4/tcp_output.c
@@ -737,7 +737,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
 
 		if (diff > 0) {
 			/* Adjust Reno SACK estimate. */
-			if (!tp->rx_opt.sack_ok) {
+			if (tcp_is_reno(tp)) {
 				tcp_dec_pcount_approx_int(&tp->sacked_out, diff);
 				tcp_verify_left_out(tp);
 			}
@@ -1728,7 +1728,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
 		if (TCP_SKB_CB(next_skb)->sacked&TCPCB_LOST)
 			tp->lost_out -= tcp_skb_pcount(next_skb);
 		/* Reno case is special. Sigh... */
-		if (!tp->rx_opt.sack_ok && tp->sacked_out)
+		if (tcp_is_reno(tp) && tp->sacked_out)
 			tcp_dec_pcount_approx(&tp->sacked_out, next_skb);
 
 		/* Not quite right: it can be > snd.fack, but
@@ -1976,7 +1976,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 		return;
 
 	/* No forward retransmissions in Reno are possible. */
-	if (!tp->rx_opt.sack_ok)
+	if (tcp_is_reno(tp))
 		return;
 
 	/* Yeah, we have to make difficult choice between forward transmission

diff --git a/trunk/net/ipv4/tcp_timer.c b/trunk/net/ipv4/tcp_timer.c
@@ -315,7 +315,7 @@ static void tcp_retransmit_timer(struct sock *sk)
 	if (icsk->icsk_retransmits == 0) {
 		if (icsk->icsk_ca_state == TCP_CA_Disorder ||
 		    icsk->icsk_ca_state == TCP_CA_Recovery) {
-			if (tp->rx_opt.sack_ok) {
+			if (tcp_is_sack(tp)) {
 				if (icsk->icsk_ca_state == TCP_CA_Recovery)
 					NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERYFAIL);
 				else