Skip to content

Commit

Permalink
rxrpc: Implement slow-start
Browse files Browse the repository at this point in the history
Implement RxRPC slow-start, which is similar to RFC 5681 for TCP.  A
tracepoint is added to log the state of the congestion management algorithm
and the decisions it makes.

Notes:

 (1) Since we send fixed-size DATA packets (apart from the final packet in
     each phase), counters and calculations are in terms of packets rather
     than bytes.

 (2) The ACK packet carries the equivalent of TCP SACK.

 (3) The FLIGHT_SIZE calculation in RFC 5681 doesn't seem particularly
     suited to SACK of a small number of packets.  It seems that, almost
     inevitably, by the time three 'duplicate' ACKs have been seen, we have
     narrowed the loss down to one or two missing packets, and the
     FLIGHT_SIZE calculation ends up as 2.

 (4) In rxrpc_resend(), if there was no data that apparently needed
     retransmission, we transmit a PING ACK to ask the peer to tell us what
     its Rx window state is.

Signed-off-by: David Howells <dhowells@redhat.com>
  • Loading branch information
David Howells committed Sep 24, 2016
1 parent 0d96796 commit 5749434
Show file tree
Hide file tree
Showing 9 changed files with 339 additions and 13 deletions.
45 changes: 45 additions & 0 deletions include/trace/events/rxrpc.h
Original file line number Diff line number Diff line change
Expand Up @@ -570,6 +570,51 @@ TRACE_EVENT(rxrpc_retransmit,
__entry->expiry)
);

TRACE_EVENT(rxrpc_congest,
TP_PROTO(struct rxrpc_call *call, struct rxrpc_ack_summary *summary,
rxrpc_serial_t ack_serial, enum rxrpc_congest_change change),

TP_ARGS(call, summary, ack_serial, change),

TP_STRUCT__entry(
__field(struct rxrpc_call *, call )
__field(enum rxrpc_congest_change, change )
__field(rxrpc_seq_t, hard_ack )
__field(rxrpc_seq_t, top )
__field(rxrpc_seq_t, lowest_nak )
__field(rxrpc_serial_t, ack_serial )
__field_struct(struct rxrpc_ack_summary, sum )
),

TP_fast_assign(
__entry->call = call;
__entry->change = change;
__entry->hard_ack = call->tx_hard_ack;
__entry->top = call->tx_top;
__entry->lowest_nak = call->acks_lowest_nak;
__entry->ack_serial = ack_serial;
memcpy(&__entry->sum, summary, sizeof(__entry->sum));
),

TP_printk("c=%p %08x %s %08x %s cw=%u ss=%u nr=%u,%u nw=%u,%u r=%u b=%u u=%u d=%u l=%x%s%s%s",
__entry->call,
__entry->ack_serial,
rxrpc_ack_names[__entry->sum.ack_reason],
__entry->hard_ack,
rxrpc_congest_modes[__entry->sum.mode],
__entry->sum.cwnd,
__entry->sum.ssthresh,
__entry->sum.nr_acks, __entry->sum.nr_nacks,
__entry->sum.nr_new_acks, __entry->sum.nr_new_nacks,
__entry->sum.nr_rot_new_acks,
__entry->top - __entry->hard_ack,
__entry->sum.cumulative_acks,
__entry->sum.dup_acks,
__entry->lowest_nak, __entry->sum.new_low_nack ? "!" : "",
rxrpc_congest_changes[__entry->change],
__entry->sum.retrans_timeo ? " rTxTo" : "")
);

#endif /* _TRACE_RXRPC_H */

/* This part must be outside protection */
Expand Down
53 changes: 52 additions & 1 deletion net/rxrpc/ar-internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,7 @@ enum rxrpc_call_flag {
RXRPC_CALL_RX_LAST, /* Received the last packet (at rxtx_top) */
RXRPC_CALL_TX_LAST, /* Last packet in Tx buffer (at rxtx_top) */
RXRPC_CALL_PINGING, /* Ping in process */
RXRPC_CALL_RETRANS_TIMEOUT, /* Retransmission due to timeout occurred */
};

/*
Expand Down Expand Up @@ -446,6 +447,17 @@ enum rxrpc_call_completion {
NR__RXRPC_CALL_COMPLETIONS
};

/*
* Call Tx congestion management modes.
*/
enum rxrpc_congest_mode {
RXRPC_CALL_SLOW_START,
RXRPC_CALL_CONGEST_AVOIDANCE,
RXRPC_CALL_PACKET_LOSS,
RXRPC_CALL_FAST_RETRANSMIT,
NR__RXRPC_CONGEST_MODES
};

/*
* RxRPC call definition
* - matched by { connection, call_id }
Expand Down Expand Up @@ -518,6 +530,20 @@ struct rxrpc_call {
* not hard-ACK'd packet follows this.
*/
rxrpc_seq_t tx_top; /* Highest Tx slot allocated. */

/* TCP-style slow-start congestion control [RFC5681]. Since the SMSS
* is fixed, we keep these numbers in terms of segments (ie. DATA
* packets) rather than bytes.
*/
#define RXRPC_TX_SMSS RXRPC_JUMBO_DATALEN
u8 cong_cwnd; /* Congestion window size */
u8 cong_extra; /* Extra to send for congestion management */
u8 cong_ssthresh; /* Slow-start threshold */
enum rxrpc_congest_mode cong_mode:8; /* Congestion management mode */
u8 cong_dup_acks; /* Count of ACKs showing missing packets */
u8 cong_cumul_acks; /* Cumulative ACK count */
ktime_t cong_tstamp; /* Last time cwnd was changed */

rxrpc_seq_t rx_hard_ack; /* Dead slot in buffer; the first received but not
* consumed packet follows this.
*/
Expand All @@ -539,12 +565,13 @@ struct rxrpc_call {
ktime_t ackr_ping_time; /* Time last ping sent */

/* transmission-phase ACK management */
ktime_t acks_latest_ts; /* Timestamp of latest ACK received */
rxrpc_serial_t acks_latest; /* serial number of latest ACK received */
rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */
};

/*
* Summary of a new ACK and the changes it made.
* Summary of a new ACK and the changes it made to the Tx buffer packet states.
*/
struct rxrpc_ack_summary {
u8 ack_reason;
Expand All @@ -554,6 +581,14 @@ struct rxrpc_ack_summary {
u8 nr_new_nacks; /* Number of new NACKs in packet */
u8 nr_rot_new_acks; /* Number of rotated new ACKs */
bool new_low_nack; /* T if new low NACK found */
bool retrans_timeo; /* T if reTx due to timeout happened */
u8 flight_size; /* Number of unreceived transmissions */
/* Place to stash values for tracing */
enum rxrpc_congest_mode mode:8;
u8 cwnd;
u8 ssthresh;
u8 dup_acks;
u8 cumulative_acks;
};

enum rxrpc_skb_trace {
Expand Down Expand Up @@ -709,6 +744,7 @@ extern const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8];
enum rxrpc_propose_ack_trace {
rxrpc_propose_ack_client_tx_end,
rxrpc_propose_ack_input_data,
rxrpc_propose_ack_ping_for_lost_ack,
rxrpc_propose_ack_ping_for_lost_reply,
rxrpc_propose_ack_ping_for_params,
rxrpc_propose_ack_respond_to_ack,
Expand All @@ -729,6 +765,21 @@ enum rxrpc_propose_ack_outcome {
extern const char rxrpc_propose_ack_traces[rxrpc_propose_ack__nr_trace][8];
extern const char *const rxrpc_propose_ack_outcomes[rxrpc_propose_ack__nr_outcomes];

enum rxrpc_congest_change {
rxrpc_cong_begin_retransmission,
rxrpc_cong_cleared_nacks,
rxrpc_cong_new_low_nack,
rxrpc_cong_no_change,
rxrpc_cong_progress,
rxrpc_cong_retransmit_again,
rxrpc_cong_rtt_window_end,
rxrpc_cong_saw_nack,
rxrpc_congest__nr_change
};

extern const char rxrpc_congest_modes[NR__RXRPC_CONGEST_MODES][10];
extern const char rxrpc_congest_changes[rxrpc_congest__nr_change][9];

extern const char *const rxrpc_pkts[];
extern const char const rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4];

Expand Down
36 changes: 34 additions & 2 deletions net/rxrpc/call_event.c
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,14 @@ void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
spin_unlock_bh(&call->lock);
}

/*
* Handle congestion being detected by the retransmit timeout.
*/
static void rxrpc_congestion_timeout(struct rxrpc_call *call)
{
set_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags);
}

/*
* Perform retransmission of NAK'd and unack'd packets.
*/
Expand All @@ -154,9 +162,9 @@ static void rxrpc_resend(struct rxrpc_call *call)
struct rxrpc_skb_priv *sp;
struct sk_buff *skb;
rxrpc_seq_t cursor, seq, top;
ktime_t now = ktime_get_real(), max_age, oldest, resend_at;
ktime_t now = ktime_get_real(), max_age, oldest, resend_at, ack_ts;
int ix;
u8 annotation, anno_type;
u8 annotation, anno_type, retrans = 0, unacked = 0;

_enter("{%d,%d}", call->tx_hard_ack, call->tx_top);

Expand Down Expand Up @@ -193,10 +201,13 @@ static void rxrpc_resend(struct rxrpc_call *call)
oldest = skb->tstamp;
continue;
}
if (!(annotation & RXRPC_TX_ANNO_RESENT))
unacked++;
}

/* Okay, we need to retransmit a packet. */
call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS | annotation;
retrans++;
trace_rxrpc_retransmit(call, seq, annotation | anno_type,
ktime_to_ns(ktime_sub(skb->tstamp, max_age)));
}
Expand All @@ -210,6 +221,25 @@ static void rxrpc_resend(struct rxrpc_call *call)
* reached the nsec timeout yet.
*/

if (unacked)
rxrpc_congestion_timeout(call);

/* If there was nothing that needed retransmission then it's likely
* that an ACK got lost somewhere. Send a ping to find out instead of
* retransmitting data.
*/
if (!retrans) {
rxrpc_set_timer(call, rxrpc_timer_set_for_resend);
spin_unlock_bh(&call->lock);
ack_ts = ktime_sub(now, call->acks_latest_ts);
if (ktime_to_ns(ack_ts) < call->peer->rtt)
goto out;
rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false,
rxrpc_propose_ack_ping_for_lost_ack);
rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK);
goto out;
}

/* Now go through the Tx window and perform the retransmissions. We
* have to drop the lock for each send. If an ACK comes in whilst the
* lock is dropped, it may clear some of the retransmission markers for
Expand Down Expand Up @@ -260,6 +290,7 @@ static void rxrpc_resend(struct rxrpc_call *call)

out_unlock:
spin_unlock_bh(&call->lock);
out:
_leave("");
}

Expand Down Expand Up @@ -293,6 +324,7 @@ void rxrpc_process_call(struct work_struct *work)
if (time_after_eq(now, call->expire_at)) {
rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, ETIME);
set_bit(RXRPC_CALL_EV_ABORT, &call->events);
goto recheck_state;
}

if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events) ||
Expand Down
13 changes: 13 additions & 0 deletions net/rxrpc/call_object.c
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,14 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
call->rx_winsize = rxrpc_rx_window_size;
call->tx_winsize = 16;
call->rx_expect_next = 1;

if (RXRPC_TX_SMSS > 2190)
call->cong_cwnd = 2;
else if (RXRPC_TX_SMSS > 1095)
call->cong_cwnd = 3;
else
call->cong_cwnd = 4;
call->cong_ssthresh = RXRPC_RXTX_BUFF_SIZE - 1;
return call;

nomem_2:
Expand All @@ -176,6 +184,7 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx,
gfp_t gfp)
{
struct rxrpc_call *call;
ktime_t now;

_enter("");

Expand All @@ -185,6 +194,9 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx,
call->state = RXRPC_CALL_CLIENT_AWAIT_CONN;
call->service_id = srx->srx_service;
call->tx_phase = true;
now = ktime_get_real();
call->acks_latest_ts = now;
call->cong_tstamp = now;

_leave(" = %p", call);
return call;
Expand Down Expand Up @@ -325,6 +337,7 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx,
call->state = RXRPC_CALL_SERVER_ACCEPTING;
if (sp->hdr.securityIndex > 0)
call->state = RXRPC_CALL_SERVER_SECURING;
call->cong_tstamp = skb->tstamp;

/* Set the channel for this call. We don't get channel_lock as we're
* only defending against the data_ready handler (which we're called
Expand Down
1 change: 1 addition & 0 deletions net/rxrpc/conn_event.c
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
pkt.info.maxMTU = htonl(mtu);
pkt.info.rwind = htonl(rxrpc_rx_window_size);
pkt.info.jumbo_max = htonl(rxrpc_rx_jumbo_max);
pkt.whdr.flags |= RXRPC_SLOW_START_OK;
len += sizeof(pkt.ack) + sizeof(pkt.info);
break;
}
Expand Down
Loading

0 comments on commit 5749434

Please sign in to comment.