Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 368583
b: refs/heads/master
c: e33099f
h: refs/heads/master
i:
  368581: 7a6d44d
  368579: 4385be0
  368575: bb9b152
v: v3
  • Loading branch information
Yuchung Cheng authored and David S. Miller committed Mar 21, 2013
1 parent 91ced13 commit 49724df
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 27 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: ab42d9ee3d215ab74a49818ffc53771a88ce7ddf
refs/heads/master: e33099f96d99c391b3325caa9c44258de04aae86
18 changes: 6 additions & 12 deletions trunk/Documentation/networking/ip-sysctl.txt
Original file line number Diff line number Diff line change
Expand Up @@ -225,19 +225,13 @@ tcp_fin_timeout - INTEGER
Default: 60 seconds

tcp_frto - INTEGER
Enables Forward RTO-Recovery (F-RTO) defined in RFC4138.
Enables Forward RTO-Recovery (F-RTO) defined in RFC5682.
F-RTO is an enhanced recovery algorithm for TCP retransmission
timeouts. It is particularly beneficial in wireless environments
where packet loss is typically due to random radio interference
rather than intermediate router congestion. F-RTO is sender-side
only modification. Therefore it does not require any support from
the peer.

If set to 1, basic version is enabled. 2 enables SACK enhanced
F-RTO if flow uses SACK. The basic version can be used also when
SACK is in use though scenario(s) with it exists where F-RTO
interacts badly with the packet counting of the SACK enabled TCP
flow.
timeouts. It is particularly beneficial in networks where the
RTT fluctuates (e.g., wireless). F-RTO is sender-side only
modification. It does not require any support from the peer.

By default it's enabled with a non-zero value. 0 disables F-RTO.

tcp_keepalive_time - INTEGER
How often TCP sends out keepalive messages when keepalive is enabled.
Expand Down
3 changes: 2 additions & 1 deletion trunk/include/linux/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,8 @@ struct tcp_sock {
u8 nonagle : 4,/* Disable Nagle algorithm? */
thin_lto : 1,/* Use linear timeouts for thin streams */
thin_dupack : 1,/* Fast retransmit on first dupack */
repair : 1;
repair : 1,
frto : 1;/* F-RTO (RFC5682) activated in CA_Loss */
u8 repair_queue;
u8 do_early_retrans:1,/* Enable RFC5827 early-retransmit */
syn_data:1, /* SYN includes data */
Expand Down
73 changes: 60 additions & 13 deletions trunk/net/ipv4/tcp_input.c
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ int sysctl_tcp_early_retrans __read_mostly = 3;
#define FLAG_DATA_SACKED 0x20 /* New SACK. */
#define FLAG_ECE 0x40 /* ECE in this ACK */
#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/
#define FLAG_ORIG_SACK_ACKED 0x200 /* Never retransmitted data are (s)acked */
#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
#define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */
#define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */
Expand Down Expand Up @@ -1155,6 +1156,8 @@ static u8 tcp_sacktag_one(struct sock *sk,
tcp_highest_sack_seq(tp)))
state->reord = min(fack_count,
state->reord);
if (!after(end_seq, tp->high_seq))
state->flag |= FLAG_ORIG_SACK_ACKED;
}

if (sacked & TCPCB_LOST) {
Expand Down Expand Up @@ -1835,10 +1838,13 @@ void tcp_enter_loss(struct sock *sk, int how)
const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
bool new_recovery = false;

/* Reduce ssthresh if it has not yet been made inside this window. */
if (icsk->icsk_ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq ||
if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
!after(tp->high_seq, tp->snd_una) ||
(icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
new_recovery = true;
tp->prior_ssthresh = tcp_current_ssthresh(sk);
tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
tcp_ca_event(sk, CA_EVENT_LOSS);
Expand Down Expand Up @@ -1883,6 +1889,14 @@ void tcp_enter_loss(struct sock *sk, int how)
tcp_set_ca_state(sk, TCP_CA_Loss);
tp->high_seq = tp->snd_nxt;
TCP_ECN_queue_cwr(tp);

/* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous
* loss recovery is underway except recurring timeout(s) on
* the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing
*/
tp->frto = sysctl_tcp_frto &&
(new_recovery || icsk->icsk_retransmits) &&
!inet_csk(sk)->icsk_mtup.probe_size;
}

/* If ACK arrived pointing to a remembered SACK, it means that our
Expand Down Expand Up @@ -2426,12 +2440,12 @@ static int tcp_try_undo_partial(struct sock *sk, int acked)
return failed;
}

/* Undo during loss recovery after partial ACK. */
static bool tcp_try_undo_loss(struct sock *sk)
/* Undo during loss recovery after partial ACK or using F-RTO. */
static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
{
struct tcp_sock *tp = tcp_sk(sk);

if (tcp_may_undo(tp)) {
if (frto_undo || tcp_may_undo(tp)) {
struct sk_buff *skb;
tcp_for_write_queue(skb, sk) {
if (skb == tcp_send_head(sk))
Expand All @@ -2445,9 +2459,12 @@ static bool tcp_try_undo_loss(struct sock *sk)
tp->lost_out = 0;
tcp_undo_cwr(sk, true);
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
if (frto_undo)
NET_INC_STATS_BH(sock_net(sk),
LINUX_MIB_TCPSPURIOUSRTOS);
inet_csk(sk)->icsk_retransmits = 0;
tp->undo_marker = 0;
if (tcp_is_sack(tp))
if (frto_undo || tcp_is_sack(tp))
tcp_set_ca_state(sk, TCP_CA_Open);
return true;
}
Expand Down Expand Up @@ -2667,24 +2684,52 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
/* Process an ACK in CA_Loss state. Move to CA_Open if lost data are
* recovered or spurious. Otherwise retransmits more on partial ACKs.
*/
static void tcp_process_loss(struct sock *sk, int flag)
static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
bool recovered = !before(tp->snd_una, tp->high_seq);

if (!before(tp->snd_una, tp->high_seq)) {
if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */
if (flag & FLAG_ORIG_SACK_ACKED) {
/* Step 3.b. A timeout is spurious if not all data are
* lost, i.e., never-retransmitted data are (s)acked.
*/
tcp_try_undo_loss(sk, true);
return;
}
if (after(tp->snd_nxt, tp->high_seq) &&
(flag & FLAG_DATA_SACKED || is_dupack)) {
tp->frto = 0; /* Loss was real: 2nd part of step 3.a */
} else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) {
tp->high_seq = tp->snd_nxt;
__tcp_push_pending_frames(sk, tcp_current_mss(sk),
TCP_NAGLE_OFF);
if (after(tp->snd_nxt, tp->high_seq))
return; /* Step 2.b */
tp->frto = 0;
}
}

if (recovered) {
/* F-RTO RFC5682 sec 3.1 step 2.a and 1st part of step 3.a */
icsk->icsk_retransmits = 0;
tcp_try_undo_recovery(sk);
return;
}

if (flag & FLAG_DATA_ACKED)
icsk->icsk_retransmits = 0;
if (tcp_is_reno(tp) && flag & FLAG_SND_UNA_ADVANCED)
tcp_reset_reno_sack(tp);
if (tcp_try_undo_loss(sk))
if (tcp_is_reno(tp)) {
/* A Reno DUPACK means new data in F-RTO step 2.b above are
* delivered. Lower inflight to clock out (re)tranmissions.
*/
if (after(tp->snd_nxt, tp->high_seq) && is_dupack)
tcp_add_reno_sack(sk);
else if (flag & FLAG_SND_UNA_ADVANCED)
tcp_reset_reno_sack(tp);
}
if (tcp_try_undo_loss(sk, false))
return;
tcp_moderate_cwnd(tp);
tcp_xmit_retransmit_queue(sk);
}

Expand Down Expand Up @@ -2764,7 +2809,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked;
break;
case TCP_CA_Loss:
tcp_process_loss(sk, flag);
tcp_process_loss(sk, flag, is_dupack);
if (icsk->icsk_ca_state != TCP_CA_Open)
return;
/* Fall through to processing in Open state. */
Expand Down Expand Up @@ -3003,6 +3048,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
}
if (!(sacked & TCPCB_SACKED_ACKED))
reord = min(pkts_acked, reord);
if (!after(scb->end_seq, tp->high_seq))
flag |= FLAG_ORIG_SACK_ACKED;
}

if (sacked & TCPCB_SACKED_ACKED)
Expand Down

0 comments on commit 49724df

Please sign in to comment.